FormatToken.cpp source code [clang/lib/Format/FormatToken.cpp]

1	//===--- FormatToken.cpp - Format C++ code --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements specific functions of \c FormatTokens and their
11	/// roles.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "FormatToken.h"
16	#include "ContinuationIndenter.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/Support/Debug.h"
19	#include <climits>
20
21	namespace clang {
22	namespace format {
23
24	const char *getTokenTypeName(TokenType Type) {
25	static const char *const TokNames[] = {
26	#define TYPE(X) #X,
27	LIST_TOKEN_TYPES
28	#undef TYPE
29	nullptr};
30
31	if (Type < NUM_TOKEN_TYPES)
32	return TokNames[Type];
33	llvm_unreachable("unknown TokenType");
34	return nullptr;
35	}
36
37	// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38	// duplication.
39	bool FormatToken::isSimpleTypeSpecifier() const {
40	switch (Tok.getKind()) {
41	case tok::kw_short:
42	case tok::kw_long:
43	case tok::kw___int64:
44	case tok::kw___int128:
45	case tok::kw_signed:
46	case tok::kw_unsigned:
47	case tok::kw_void:
48	case tok::kw_char:
49	case tok::kw_int:
50	case tok::kw_half:
51	case tok::kw_float:
52	case tok::kw_double:
53	case tok::kw___bf16:
54	case tok::kw__Float16:
55	case tok::kw___float128:
56	case tok::kw___ibm128:
57	case tok::kw_wchar_t:
58	case tok::kw_bool:
59	#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
60	#include "clang/Basic/TransformTypeTraits.def"
61	case tok::annot_typename:
62	case tok::kw_char8_t:
63	case tok::kw_char16_t:
64	case tok::kw_char32_t:
65	case tok::kw_typeof:
66	case tok::kw_decltype:
67	case tok::kw__Atomic:
68	return true;
69	default:
70	return false;
71	}
72	}
73
74	// Sorted common C++ non-keyword types.
75	static SmallVector<StringRef> CppNonKeywordTypes = {
76	"clock_t", "int16_t", "int32_t", "int64_t", "int8_t",
77	"intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t",
78	"uint32_t", "uint64_t", "uint8_t", "uintptr_t",
79	};
80
81	bool FormatToken::isTypeName(bool IsCpp) const {
82	return is(TT: TT_TypeName) \|\| isSimpleTypeSpecifier() \|\|
83	(IsCpp && is(Kind: tok::identifier) &&
84	std::binary_search(first: CppNonKeywordTypes.begin(),
85	last: CppNonKeywordTypes.end(), val: TokenText));
86	}
87
88	bool FormatToken::isTypeOrIdentifier(bool IsCpp) const {
89	return isTypeName(IsCpp) \|\| isOneOf(K1: tok::kw_auto, K2: tok::identifier);
90	}
91
92	bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
93	assert(is(tok::r_brace));
94	if (!Style.Cpp11BracedListStyle \|\|
95	Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
96	return false;
97	}
98	const auto *LBrace = MatchingParen;
99	assert(LBrace && LBrace->is(tok::l_brace));
100	if (LBrace->is(BBK: BK_BracedInit))
101	return true;
102	if (LBrace->Previous && LBrace->Previous->is(Kind: tok::equal))
103	return true;
104	return false;
105	}
106
107	bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
108	// C# Does not indent object initialisers as continuations.
109	if (is(Kind: tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
110	return true;
111	if (is(TT: TT_TemplateString) && opensScope())
112	return true;
113	return is(TT: TT_ArrayInitializerLSquare) \|\| is(TT: TT_ProtoExtensionLSquare) \|\|
114	(is(Kind: tok::l_brace) &&
115	(getBlockKind() == BK_Block \|\| is(TT: TT_DictLiteral) \|\|
116	(!Style.Cpp11BracedListStyle && NestingLevel == `0`))) \|\|
117	(is(Kind: tok::less) && Style.isProto());
118	}
119
120	TokenRole::~TokenRole() {}
121
122	void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
123
124	unsigned CommaSeparatedList::formatAfterToken(LineState &State,
125	ContinuationIndenter *Indenter,
126	bool DryRun) {
127	if (!State.NextToken \|\| !State.NextToken->Previous)
128	return `0`;
129
130	if (Formats.size() <= `1`)
131	return `0`; // Handled by formatFromToken (1) or avoid severe penalty (0).
132
133	// Ensure that we start on the opening brace.
134	const FormatToken *LBrace =
135	State.NextToken->Previous->getPreviousNonComment();
136	if (!LBrace \|\| !LBrace->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare) \|\|
137	LBrace->is(BBK: BK_Block) \|\| LBrace->is(TT: TT_DictLiteral) \|\|
138	LBrace->Next->is(TT: TT_DesignatedInitializerPeriod)) {
139	return `0`;
140	}
141
142	// Calculate the number of code points we have to format this list. As the
143	// first token is already placed, we have to subtract it.
144	unsigned RemainingCodePoints =
145	Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
146
147	// Find the best ColumnFormat, i.e. the best number of columns to use.
148	const ColumnFormat *Format = getColumnFormat(RemainingCharacters: RemainingCodePoints);
149
150	// If no ColumnFormat can be used, the braced list would generally be
151	// bin-packed. Add a severe penalty to this so that column layouts are
152	// preferred if possible.
153	if (!Format)
154	return `10'000`;
155
156	// Format the entire list.
157	unsigned Penalty = `0`;
158	unsigned Column = `0`;
159	unsigned Item = `0`;
160	while (State.NextToken != LBrace->MatchingParen) {
161	bool NewLine = false;
162	unsigned ExtraSpaces = `0`;
163
164	// If the previous token was one of our commas, we are now on the next item.
165	if (Item < Commas.size() && State.NextToken->Previous == Commas [Item]) {
166	if (!State.NextToken->isTrailingComment()) {
167	ExtraSpaces += Format->ColumnSizes [Column] - ItemLengths [Item];
168	++Column;
169	}
170	++Item;
171	}
172
173	if (Column == Format->Columns \|\| State.NextToken->MustBreakBefore) {
174	Column = `0`;
175	NewLine = true;
176	}
177
178	// Place token using the continuation indenter and store the penalty.
179	Penalty += Indenter->addTokenToState(State, Newline: NewLine, DryRun, ExtraSpaces);
180	}
181	return Penalty;
182	}
183
184	unsigned CommaSeparatedList::formatFromToken(LineState &State,
185	ContinuationIndenter *Indenter,
186	bool DryRun) {
187	// Formatting with 1 Column isn't really a column layout, so we don't need the
188	// special logic here. We can just avoid bin packing any of the parameters.
189	if (Formats.size() == `1` \|\| HasNestedBracedList)
190	State.Stack.back().AvoidBinPacking = true;
191	return `0`;
192	}
193
194	// Returns the lengths in code points between Begin and End (both included),
195	// assuming that the entire sequence is put on a single line.
196	static unsigned CodePointsBetween(const FormatToken *Begin,
197	const FormatToken *End) {
198	assert(End->TotalLength >= Begin->TotalLength);
199	return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
200	}
201
202	void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
203	// FIXME: At some point we might want to do this for other lists, too.
204	if (!Token->MatchingParen \|\|
205	!Token->isOneOf(K1: tok::l_brace, K2: TT_ArrayInitializerLSquare)) {
206	return;
207	}
208
209	// In C++11 braced list style, we should not format in columns unless they
210	// have many items (20 or more) or we allow bin-packing of function call
211	// arguments.
212	if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
213	Commas.size() < `19`) {
214	return;
215	}
216
217	// Limit column layout for JavaScript array initializers to 20 or more items
218	// for now to introduce it carefully. We can become more aggressive if this
219	// necessary.
220	if (Token->is(TT: TT_ArrayInitializerLSquare) && Commas.size() < `19`)
221	return;
222
223	// Column format doesn't really make sense if we don't align after brackets.
224	if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
225	return;
226
227	FormatToken *ItemBegin = Token->Next;
228	while (ItemBegin->isTrailingComment())
229	ItemBegin = ItemBegin->Next;
230	SmallVector<bool, `8`> MustBreakBeforeItem;
231
232	// The lengths of an item if it is put at the end of the line. This includes
233	// trailing comments which are otherwise ignored for column alignment.
234	SmallVector<unsigned, `8`> EndOfLineItemLength;
235	MustBreakBeforeItem.reserve(N: Commas.size() + `1`);
236	EndOfLineItemLength.reserve(N: Commas.size() + `1`);
237	ItemLengths.reserve(N: Commas.size() + `1`);
238
239	bool HasSeparatingComment = false;
240	for (unsigned i = `0`, e = Commas.size() + `1`; i != e; ++i) {
241	assert(ItemBegin);
242	// Skip comments on their own line.
243	while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
244	ItemBegin = ItemBegin->Next;
245	HasSeparatingComment = i > `0`;
246	}
247
248	MustBreakBeforeItem.push_back(Elt: ItemBegin->MustBreakBefore);
249	if (ItemBegin->is(Kind: tok::l_brace))
250	HasNestedBracedList = true;
251	const FormatToken ItemEnd = nullptr*;
252	if (i == Commas.size()) {
253	ItemEnd = Token->MatchingParen;
254	const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
255	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: NonCommentEnd));
256	if (Style.Cpp11BracedListStyle &&
257	!ItemEnd->Previous->isTrailingComment()) {
258	// In Cpp11 braced list style, the } and possibly other subsequent
259	// tokens will need to stay on a line with the last element.
260	while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
261	ItemEnd = ItemEnd->Next;
262	} else {
263	// In other braced lists styles, the "}" can be wrapped to the new line.
264	ItemEnd = Token->MatchingParen->Previous;
265	}
266	} else {
267	ItemEnd = Commas [i];
268	// The comma is counted as part of the item when calculating the length.
269	ItemLengths.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
270
271	// Consume trailing comments so the are included in EndOfLineItemLength.
272	if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
273	ItemEnd->Next->isTrailingComment()) {
274	ItemEnd = ItemEnd->Next;
275	}
276	}
277	EndOfLineItemLength.push_back(Elt: CodePointsBetween(Begin: ItemBegin, End: ItemEnd));
278	// If there is a trailing comma in the list, the next item will start at the
279	// closing brace. Don't create an extra item for this.
280	if (ItemEnd->getNextNonComment() == Token->MatchingParen)
281	break;
282	ItemBegin = ItemEnd->Next;
283	}
284
285	// Don't use column layout for lists with few elements and in presence of
286	// separating comments.
287	if (Commas.size() < `5` \|\| HasSeparatingComment)
288	return;
289
290	if (Token->NestingLevel != `0` && Token->is(Kind: tok::l_brace) && Commas.size() < `19`)
291	return;
292
293	// We can never place more than ColumnLimit / 3 items in a row (because of the
294	// spaces and the comma).
295	unsigned MaxItems = Style.ColumnLimit / `3`;
296	SmallVector<unsigned> MinSizeInColumn;
297	MinSizeInColumn.reserve(N: MaxItems);
298	for (unsigned Columns = `1`; Columns <= MaxItems; ++Columns) {
299	ColumnFormat Format;
300	Format.Columns = Columns;
301	Format.ColumnSizes.resize(N: Columns);
302	MinSizeInColumn.assign(NumElts: Columns, UINT_MAX);
303	Format.LineCount = `1`;
304	bool HasRowWithSufficientColumns = false;
305	unsigned Column = `0`;
306	for (unsigned i = `0`, e = ItemLengths.size(); i != e; ++i) {
307	assert(i < MustBreakBeforeItem.size());
308	if (MustBreakBeforeItem [i] \|\| Column == Columns) {
309	++Format.LineCount;
310	Column = `0`;
311	}
312	if (Column == Columns - `1`)
313	HasRowWithSufficientColumns = true;
314	unsigned Length =
315	(Column == Columns - `1`) ? EndOfLineItemLength [i] : ItemLengths [i];
316	Format.ColumnSizes [Column] = std::max(a: Format.ColumnSizes [Column], b: Length);
317	MinSizeInColumn [Column] = std::min(a: MinSizeInColumn [Column], b: Length);
318	++Column;
319	}
320	// If all rows are terminated early (e.g. by trailing comments), we don't
321	// need to look further.
322	if (!HasRowWithSufficientColumns)
323	break;
324	Format.TotalWidth = Columns - `1`; // Width of the N-1 spaces.
325
326	for (unsigned i = `0`; i < Columns; ++i)
327	Format.TotalWidth += Format.ColumnSizes [i];
328
329	// Don't use this Format, if the difference between the longest and shortest
330	// element in a column exceeds a threshold to avoid excessive spaces.
331	if ([&] {
332	for (unsigned i = `0`; i < Columns - `1`; ++i)
333	if (Format.ColumnSizes [i] - MinSizeInColumn [i] > `10`)
334	return true;
335	return false;
336	}()) {
337	continue;
338	}
339
340	// Ignore layouts that are bound to violate the column limit.
341	if (Format.TotalWidth > Style.ColumnLimit && Columns > `1`)
342	continue;
343
344	Formats.push_back(Elt: Format);
345	}
346	}
347
348	const CommaSeparatedList::ColumnFormat *
349	CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
350	const ColumnFormat BestFormat = nullptr*;
351	for (const ColumnFormat &Format : llvm::reverse(C: Formats)) {
352	if (Format.TotalWidth <= RemainingCharacters \|\| Format.Columns == `1`) {
353	if (BestFormat && Format.LineCount > BestFormat->LineCount)
354	break;
355	BestFormat = &Format;
356	}
357	}
358	return BestFormat;
359	}
360
361	} // namespace format
362	} // namespace clang
363

source code of clang/lib/Format/FormatToken.cpp