BreakableToken.cpp source code [clang/lib/Format/BreakableToken.cpp]

1	//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// Contains implementation of BreakableToken class and classes derived
11	/// from it.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#include "BreakableToken.h"
16	#include "ContinuationIndenter.h"
17	#include "clang/Basic/CharInfo.h"
18	#include "clang/Format/Format.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/Support/Debug.h"
21	#include <algorithm>
22
23	#define DEBUG_TYPE "format-token-breaker"
24
25	namespace clang {
26	namespace format {
27
28	static constexpr StringRef Blanks = " \t\v\f\r";
29	static bool IsBlank(char C) {
30	switch (C) {
31	case `' '`:
32	case `'\t'`:
33	case `'\v'`:
34	case `'\f'`:
35	case `'\r'`:
36	return true;
37	default:
38	return false;
39	}
40	}
41
42	static StringRef getLineCommentIndentPrefix(StringRef Comment,
43	const FormatStyle &Style) {
44	static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45	"//!", "//:", "//"};
46	static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47	"//", "#"};
48	ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
49	if (Style.isTextProto())
50	KnownPrefixes = KnownTextProtoPrefixes;
51
52	assert(
53	llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54	return Lhs.size() > Rhs.size();
55	}));
56
57	for (StringRef KnownPrefix : KnownPrefixes) {
58	if (Comment.starts_with(Prefix: KnownPrefix)) {
59	const auto PrefixLength =
60	Comment.find_first_not_of(C: `' '`, From: KnownPrefix.size());
61	return Comment.substr(Start: `0`, N: PrefixLength);
62	}
63	}
64	return {};
65	}
66
67	static BreakableToken::Split
68	getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69	unsigned ColumnLimit, unsigned TabWidth,
70	encoding::Encoding Encoding, const FormatStyle &Style,
71	bool DecorationEndsWithStar = false) {
72	LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73	<< "\", Column limit: " << ColumnLimit
74	<< ", Content start: " << ContentStartColumn << "\n");
75	if (ColumnLimit <= ContentStartColumn + `1`)
76	return BreakableToken::Split (StringRef::npos, `0`);
77
78	unsigned MaxSplit = ColumnLimit - ContentStartColumn + `1`;
79	unsigned MaxSplitBytes = `0`;
80
81	for (unsigned NumChars = `0`;
82	NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83	unsigned BytesInChar =
84	encoding::getCodePointNumBytes(FirstChar: Text [MaxSplitBytes], Encoding);
85	NumChars += encoding::columnWidthWithTabs(
86	Text: Text.substr(Start: MaxSplitBytes, N: BytesInChar), StartColumn: ContentStartColumn + NumChars,
87	TabWidth, Encoding);
88	MaxSplitBytes += BytesInChar;
89	}
90
91	// In JavaScript, some @tags can be followed by {, and machinery that parses
92	// these comments will fail to understand the comment if followed by a line
93	// break. So avoid ever breaking before a {.
94	if (Style.isJavaScript()) {
95	StringRef::size_type SpaceOffset =
96	Text.find_first_of(Chars: Blanks, From: MaxSplitBytes);
97	if (SpaceOffset != StringRef::npos && SpaceOffset + `1` < Text.size() &&
98	Text [SpaceOffset + `1`] == `'{'`) {
99	MaxSplitBytes = SpaceOffset + `1`;
100	}
101	}
102
103	StringRef::size_type SpaceOffset = Text.find_last_of(Chars: Blanks, From: MaxSplitBytes);
104
105	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\.");
106	// Some spaces are unacceptable to break on, rewind past them.
107	while (SpaceOffset != StringRef::npos) {
108	// If a line-comment ends with `\`, the next line continues the comment,
109	// whether or not it starts with `//`. This is confusing and triggers
110	// -Wcomment.
111	// Avoid introducing multiline comments by not allowing a break right
112	// after '\'.
113	if (Style.isCpp()) {
114	StringRef::size_type LastNonBlank =
115	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset);
116	if (LastNonBlank != StringRef::npos && Text [LastNonBlank] == `'\\'`) {
117	SpaceOffset = Text.find_last_of(Chars: Blanks, From: LastNonBlank);
118	continue;
119	}
120	}
121
122	// Do not split before a number followed by a dot: this would be interpreted
123	// as a numbered list, which would prevent re-flowing in subsequent passes.
124	if (kNumberedListRegexp.match(String: Text.substr(Start: SpaceOffset).ltrim(Chars: Blanks))) {
125	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
126	continue;
127	}
128
129	// Avoid ever breaking before a @tag or a { in JavaScript.
130	if (Style.isJavaScript() && SpaceOffset + `1` < Text.size() &&
131	(Text [SpaceOffset + `1`] == `'{'` \|\| Text [SpaceOffset + `1`] == `'@'`)) {
132	SpaceOffset = Text.find_last_of(Chars: Blanks, From: SpaceOffset);
133	continue;
134	}
135
136	break;
137	}
138
139	if (SpaceOffset == StringRef::npos \|\|
140	// Don't break at leading whitespace.
141	Text.find_last_not_of(Chars: Blanks, From: SpaceOffset) == StringRef::npos) {
142	// Make sure that we don't break at leading whitespace that
143	// reaches past MaxSplit.
144	StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Chars: Blanks);
145	if (FirstNonWhitespace == StringRef::npos) {
146	// If the comment is only whitespace, we cannot split.
147	return BreakableToken::Split (StringRef::npos, `0`);
148	}
149	SpaceOffset = Text.find_first_of(
150	Chars: Blanks, From: std::max<unsigned>(a: MaxSplitBytes, b: FirstNonWhitespace));
151	}
152	if (SpaceOffset != StringRef::npos && SpaceOffset != `0`) {
153	// adaptStartOfLine will break after lines starting with /* if the comment*
154	// is broken anywhere. Avoid emitting this break twice here.
155	// Example: in /* longtextcomesherethatbreaks / (with ColumnLimit 20) will
156	// insert a break after /, so this code must not insert the same break.
157	if (SpaceOffset == `1` && Text [SpaceOffset - `1`] == `'*'`)
158	return BreakableToken::Split (StringRef::npos, `0`);
159	StringRef BeforeCut = Text.substr(Start: `0`, N: SpaceOffset).rtrim(Chars: Blanks);
160	StringRef AfterCut = Text.substr(Start: SpaceOffset);
161	// Don't trim the leading blanks if it would create a / after the break.*
162	if (!DecorationEndsWithStar \|\| AfterCut.size() <= `1` \|\| AfterCut [`1`] != `'/'`)
163	AfterCut = AfterCut.ltrim(Chars: Blanks);
164	return BreakableToken::Split (BeforeCut.size(),
165	AfterCut.begin() - BeforeCut.end());
166	}
167	return BreakableToken::Split (StringRef::npos, `0`);
168	}
169
170	static BreakableToken::Split
171	getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172	unsigned TabWidth, encoding::Encoding Encoding) {
173	// FIXME: Reduce unit test case.
174	if (Text.empty())
175	return BreakableToken::Split (StringRef::npos, `0`);
176	if (ColumnLimit <= UsedColumns)
177	return BreakableToken::Split (StringRef::npos, `0`);
178	unsigned MaxSplit = ColumnLimit - UsedColumns;
179	StringRef::size_type SpaceOffset = `0`;
180	StringRef::size_type SlashOffset = `0`;
181	StringRef::size_type WordStartOffset = `0`;
182	StringRef::size_type SplitPoint = `0`;
183	for (unsigned Chars = `0`;;) {
184	unsigned Advance;
185	if (Text [`0`] == `'\\'`) {
186	Advance = encoding::getEscapeSequenceLength(Text);
187	Chars += Advance;
188	} else {
189	Advance = encoding::getCodePointNumBytes(FirstChar: Text [`0`], Encoding);
190	Chars += encoding::columnWidthWithTabs(
191	Text: Text.substr(Start: `0`, N: Advance), StartColumn: UsedColumns + Chars, TabWidth, Encoding);
192	}
193
194	if (Chars > MaxSplit \|\| Text.size() <= Advance)
195	break;
196
197	if (IsBlank(C: Text [`0`]))
198	SpaceOffset = SplitPoint;
199	if (Text [`0`] == `'/'`)
200	SlashOffset = SplitPoint;
201	if (Advance == `1` && !isAlphanumeric(c: Text [`0`]))
202	WordStartOffset = SplitPoint;
203
204	SplitPoint += Advance;
205	Text = Text.substr(Start: Advance);
206	}
207
208	if (SpaceOffset != `0`)
209	return BreakableToken::Split (SpaceOffset + `1`, `0`);
210	if (SlashOffset != `0`)
211	return BreakableToken::Split (SlashOffset + `1`, `0`);
212	if (WordStartOffset != `0`)
213	return BreakableToken::Split (WordStartOffset + `1`, `0`);
214	if (SplitPoint != `0`)
215	return BreakableToken::Split (SplitPoint, `0`);
216	return BreakableToken::Split (StringRef::npos, `0`);
217	}
218
219	bool switchesFormatting(const FormatToken &Token) {
220	assert((Token.is(TT_BlockComment) \|\| Token.is(TT_LineComment)) &&
221	"formatting regions are switched by comment tokens");
222	StringRef Content = Token.TokenText.substr(Start: `2`).ltrim();
223	return Content.starts_with(Prefix: "clang-format on") \|\|
224	Content.starts_with(Prefix: "clang-format off");
225	}
226
227	unsigned
228	BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229	Split Split) const {
230	// Example: consider the content
231	// lala lala
232	// - RemainingTokenColumns is the original number of columns, 10;
233	// - Split is (4, 2), denoting the two spaces between the two words;
234	//
235	// We compute the number of columns when the split is compressed into a single
236	// space, like:
237	// lala lala
238	//
239	// FIXME: Correctly measure the length of whitespace in Split.second so it
240	// works with tabs.
241	return RemainingTokenColumns + `1` - Split.second;
242	}
243
244	unsigned BreakableStringLiteral::getLineCount() const { return `1`; }
245
246	unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247	unsigned Offset,
248	StringRef::size_type Length,
249	unsigned StartColumn) const {
250	llvm_unreachable("Getting the length of a part of the string literal "
251	"indicates that the code tries to reflow it.");
252	}
253
254	unsigned
255	BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
256	unsigned StartColumn) const {
257	return UnbreakableTailLength + Postfix.size() +
258	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
259	TabWidth: Style.TabWidth, Encoding);
260	}
261
262	unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
263	bool Break) const {
264	return StartColumn + Prefix.size();
265	}
266
267	BreakableStringLiteral::BreakableStringLiteral(
268	const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269	StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
270	encoding::Encoding Encoding, const FormatStyle &Style)
271	: BreakableToken (Tok, InPPDirective, Encoding, Style),
272	StartColumn(StartColumn), Prefix (Prefix), Postfix (Postfix),
273	UnbreakableTailLength(UnbreakableTailLength) {
274	assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
275	Line = Tok.TokenText.substr(
276	Start: Prefix.size(), N: Tok.TokenText.size() - Prefix.size() - Postfix.size());
277	}
278
279	BreakableToken::Split BreakableStringLiteral::getSplit(
280	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282	return getStringSplit(Text: Line.substr(Start: TailOffset), UsedColumns: ContentStartColumn,
283	ColumnLimit: ColumnLimit - Postfix.size(), TabWidth: Style.TabWidth, Encoding);
284	}
285
286	void BreakableStringLiteral::insertBreak(unsigned LineIndex,
287	unsigned TailOffset, Split Split,
288	unsigned ContentIndent,
289	WhitespaceManager &Whitespaces) const {
290	Whitespaces.replaceWhitespaceInToken(
291	Tok, Offset: Prefix.size() + TailOffset + Split.first, ReplaceChars: Split.second, PreviousPostfix: Postfix,
292	CurrentPrefix: Prefix, InPPDirective, Newlines: `1`, Spaces: StartColumn);
293	}
294
295	BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
296	const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
297	unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
298	encoding::Encoding Encoding, const FormatStyle &Style)
299	: BreakableStringLiteral (
300	Tok, StartColumn, /Prefix=/QuoteStyle == SingleQuotes ? "'"
301	: QuoteStyle == AtDoubleQuotes ? "@\""
302	: "\"",
303	/Postfix=/QuoteStyle == SingleQuotes ? "'" : "\"",
304	UnbreakableTailLength, InPPDirective, Encoding, Style),
305	BracesNeeded(Tok.isNot(Kind: TT_StringInConcatenation)),
306	QuoteStyle(QuoteStyle) {
307	// Find the replacement text for inserting braces and quotes and line breaks.
308	// We don't create an allocated string concatenated from parts here because it
309	// has to outlive the BreakableStringliteral object. The brace replacements
310	// include a quote so that WhitespaceManager can tell it apart from whitespace
311	// replacements between the string and surrounding tokens.
312
313	// The option is not implemented in JavaScript.
314	bool SignOnNewLine =
315	!Style.isJavaScript() &&
316	Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
317
318	if (Style.isVerilog()) {
319	// In Verilog, all strings are quoted by double quotes, joined by commas,
320	// and wrapped in braces. The comma is always before the newline.
321	assert(QuoteStyle == DoubleQuotes);
322	LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
323	RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
324	Postfix = "\",";
325	Prefix = "\"";
326	} else {
327	// The plus sign may be on either line. And also C# and JavaScript have
328	// several quoting styles.
329	if (QuoteStyle == SingleQuotes) {
330	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
331	RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
332	Postfix = SignOnNewLine ? "'" : "' +";
333	Prefix = SignOnNewLine ? "+ '" : "'";
334	} else {
335	if (QuoteStyle == AtDoubleQuotes) {
336	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
337	Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338	} else {
339	LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
340	Prefix = SignOnNewLine ? "+ \"" : "\"";
341	}
342	RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
343	Postfix = SignOnNewLine ? "\"" : "\" +";
344	}
345	}
346
347	// Following lines are indented by the width of the brace and space if any.
348	ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - `1` : `0`;
349	// The plus sign may need to be unindented depending on the style.
350	// FIXME: Add support for DontAlign.
351	if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
352	Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
353	ContinuationIndent -= `2`;
354	}
355	}
356
357	unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
358	unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
359	return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : `1`) +
360	encoding::columnWidthWithTabs(Text: Line.substr(Start: Offset), StartColumn,
361	TabWidth: Style.TabWidth, Encoding);
362	}
363
364	unsigned
365	BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
366	bool Break) const {
367	return std::max(
368	a: `0`,
369	b: static_cast<int>(StartColumn) +
370	(Break ? ContinuationIndent + static_cast<int>(Prefix.size())
371	: (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - `1`
372	: `0`) +
373	(QuoteStyle == AtDoubleQuotes ? `2` : `1`)));
374	}
375
376	void BreakableStringLiteralUsingOperators::insertBreak(
377	unsigned LineIndex, unsigned TailOffset, Split Split,
378	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
379	Whitespaces.replaceWhitespaceInToken(
380	Tok, /Offset=/(QuoteStyle == AtDoubleQuotes ? `2` : `1`) + TailOffset +
381	Split.first,
382	/ReplaceChars=/Split.second, /PreviousPostfix=/Postfix,
383	/CurrentPrefix=/Prefix, InPPDirective, /NewLines=/Newlines: `1`,
384	/Spaces=/
385	std::max(a: `0`, b: static_cast<int>(StartColumn) + ContinuationIndent));
386	}
387
388	void BreakableStringLiteralUsingOperators::updateAfterBroken(
389	WhitespaceManager &Whitespaces) const {
390	// Add the braces required for breaking the token if they are needed.
391	if (!BracesNeeded)
392	return;
393
394	// To add a brace or parenthesis, we replace the quote (or the at sign) with a
395	// brace and another quote. This is because the rest of the program requires
396	// one replacement for each source range. If we replace the empty strings
397	// around the string, it may conflict with whitespace replacements between the
398	// string and adjacent tokens.
399	Whitespaces.replaceWhitespaceInToken(
400	Tok, /Offset=/`0`, /ReplaceChars=/`1`, /PreviousPostfix=/"",
401	/CurrentPrefix=/LeftBraceQuote, InPPDirective, /NewLines=/Newlines: `0`,
402	/Spaces=/`0`);
403	Whitespaces.replaceWhitespaceInToken(
404	Tok, /Offset=/Tok.TokenText.size() - `1`, /ReplaceChars=/`1`,
405	/PreviousPostfix=/RightBraceQuote,
406	/CurrentPrefix=/"", InPPDirective, /NewLines=/Newlines: `0`, /Spaces=/`0`);
407	}
408
409	BreakableComment::BreakableComment(const FormatToken &Token,
410	unsigned StartColumn, bool InPPDirective,
411	encoding::Encoding Encoding,
412	const FormatStyle &Style)
413	: BreakableToken (Token, InPPDirective, Encoding, Style),
414	StartColumn(StartColumn) {}
415
416	unsigned BreakableComment::getLineCount() const { return Lines.size(); }
417
418	BreakableToken::Split
419	BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
420	unsigned ColumnLimit, unsigned ContentStartColumn,
421	const llvm::Regex &CommentPragmasRegex) const {
422	// Don't break lines matching the comment pragmas regex.
423	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
424	return Split (StringRef::npos, `0`);
425	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
426	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
427	Encoding, Style);
428	}
429
430	void BreakableComment::compressWhitespace(
431	unsigned LineIndex, unsigned TailOffset, Split Split,
432	WhitespaceManager &Whitespaces) const {
433	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
434	// Text is relative to the content line, but Whitespaces operates relative to
435	// the start of the corresponding token, so compute the start of the Split
436	// that needs to be compressed into a single space relative to the start of
437	// its token.
438	unsigned BreakOffsetInToken =
439	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
440	unsigned CharsToRemove = Split.second;
441	Whitespaces.replaceWhitespaceInToken(
442	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "", CurrentPrefix: "",
443	/InPPDirective=/false, /Newlines=/`0`, /Spaces=/`1`);
444	}
445
446	const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
447	return Tokens [LineIndex] ? *Tokens [LineIndex] : Tok;
448	}
449
450	static bool mayReflowContent(StringRef Content) {
451	Content = Content.trim(Chars: Blanks);
452	// Lines starting with '@' or '\' commonly have special meaning.
453	// Lines starting with '-', '-#', '+' or '' are bulleted/numbered lists.*
454	bool hasSpecialMeaningPrefix = false;
455	for (StringRef Prefix :
456	{"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457	if (Content.starts_with(Prefix)) {
458	hasSpecialMeaningPrefix = true;
459	break;
460	}
461	}
462
463	// Numbered lists may also start with a number followed by '.'
464	// To avoid issues if a line starts with a number which is actually the end
465	// of a previous line, we only consider numbers with up to 2 digits.
466	static const auto kNumberedListRegexp = llvm::Regex ("^[1-9][0-9]?\\. ");
467	hasSpecialMeaningPrefix =
468	hasSpecialMeaningPrefix \|\| kNumberedListRegexp.match(String: Content);
469
470	// Simple heuristic for what to reflow: content should contain at least two
471	// characters and either the first or second character must be
472	// non-punctuation.
473	return Content.size() >= `2` && !hasSpecialMeaningPrefix &&
474	!Content.ends_with(Suffix: "\\") &&
475	// Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
476	// true, then the first code point must be 1 byte long.
477	(!isPunctuation(c: Content [`0`]) \|\| !isPunctuation(c: Content [`1`]));
478	}
479
480	BreakableBlockComment::BreakableBlockComment(
481	const FormatToken &Token, unsigned StartColumn,
482	unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
483	encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
484	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style),
485	DelimitersOnNewline(false),
486	UnbreakableTailLength(Token.UnbreakableTailLength) {
487	assert(Tok.is(TT_BlockComment) &&
488	"block comment section must start with a block comment");
489
490	StringRef TokenText(Tok.TokenText);
491	assert(TokenText.starts_with("/") && TokenText.ends_with("/"));
492	TokenText.substr(Start: `2`, N: TokenText.size() - `4`)
493	.split(A&: Lines, Separator: UseCRLF ? "\r\n" : "\n");
494
495	int IndentDelta = StartColumn - OriginalStartColumn;
496	Content.resize(N: Lines.size());
497	Content [`0`] = Lines [`0`];
498	ContentColumn.resize(N: Lines.size());
499	// Account for the initial '/'.*
500	ContentColumn [`0`] = StartColumn + `2`;
501	Tokens.resize(N: Lines.size());
502	for (size_t i = `1`; i < Lines.size(); ++i)
503	adjustWhitespace(LineIndex: i, IndentDelta);
504
505	// Align decorations with the column of the star on the first line,
506	// that is one column after the start "/".*
507	DecorationColumn = StartColumn + `1`;
508
509	// Account for comment decoration patterns like this:
510	//
511	// /*
512	// * blah blah blah*
513	// /*
514	if (Lines.size() >= `2` && Content [`1`].starts_with(Prefix: "**") &&
515	static_cast<unsigned>(ContentColumn [`1`]) == StartColumn) {
516	DecorationColumn = StartColumn;
517	}
518
519	Decoration = "* ";
520	if (Lines.size() == `1` && !FirstInLine) {
521	// Comments for which FirstInLine is false can start on arbitrary column,
522	// and available horizontal space can be too small to align consecutive
523	// lines with the first one.
524	// FIXME: We could, probably, align them to current indentation level, but
525	// now we just wrap them without stars.
526	Decoration = "";
527	}
528	for (size_t i = `1`, e = Content.size(); i < e && !Decoration.empty(); ++i) {
529	const StringRef &Text = Content [i];
530	if (i + `1` == e) {
531	// If the last line is empty, the closing "/" will have a star.*
532	if (Text.empty())
533	break;
534	} else if (!Text.empty() && Decoration.starts_with(Prefix: Text)) {
535	continue;
536	}
537	while (!Text.starts_with(Prefix: Decoration))
538	Decoration = Decoration.drop_back(N: `1`);
539	}
540
541	LastLineNeedsDecoration = true;
542	IndentAtLineBreak = ContentColumn [`0`] + `1`;
543	for (size_t i = `1`, e = Lines.size(); i < e; ++i) {
544	if (Content [i].empty()) {
545	if (i + `1` == e) {
546	// Empty last line means that we already have a star as a part of the
547	// trailing /. We also need to preserve whitespace, so that / is
548	// correctly indented.
549	LastLineNeedsDecoration = false;
550	// Align the star in the last '/' with the stars on the previous lines.*
551	if (e >= `2` && !Decoration.empty())
552	ContentColumn [i] = DecorationColumn;
553	} else if (Decoration.empty()) {
554	// For all other lines, set the start column to 0 if they're empty, so
555	// we do not insert trailing whitespace anywhere.
556	ContentColumn [i] = `0`;
557	}
558	continue;
559	}
560
561	// The first line already excludes the star.
562	// The last line excludes the star if LastLineNeedsDecoration is false.
563	// For all other lines, adjust the line to exclude the star and
564	// (optionally) the first whitespace.
565	unsigned DecorationSize = Decoration.starts_with(Prefix: Content [i])
566	? Content [i].size()
567	: Decoration.size();
568	if (DecorationSize)
569	ContentColumn [i] = DecorationColumn + DecorationSize;
570	Content [i] = Content [i].substr(Start: DecorationSize);
571	if (!Decoration.starts_with(Prefix: Content [i])) {
572	IndentAtLineBreak =
573	std::min<int>(a: IndentAtLineBreak, b: std::max(a: `0`, b: ContentColumn [i]));
574	}
575	}
576	IndentAtLineBreak = std::max<unsigned>(a: IndentAtLineBreak, b: Decoration.size());
577
578	// Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
579	if (Style.isJavaScript() \|\| Style.isJava()) {
580	if ((Lines [`0`] == "" \|\| Lines [`0`].starts_with(Prefix: " ")) && Lines.size() > `1`) {
581	// This is a multiline jsdoc comment.
582	DelimitersOnNewline = true;
583	} else if (Lines [`0`].starts_with(Prefix: "* ") && Lines.size() == `1`) {
584	// Detect a long single-line comment, like:
585	// /* long long long /
586	// Below, '2' is the width of '/'.*
587	unsigned EndColumn =
588	ContentColumn [`0`] +
589	encoding::columnWidthWithTabs(Text: Lines [`0`], StartColumn: ContentColumn [`0`],
590	TabWidth: Style.TabWidth, Encoding) +
591	`2`;
592	DelimitersOnNewline = EndColumn > Style.ColumnLimit;
593	}
594	}
595
596	LLVM_DEBUG({
597	llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598	llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599	for (size_t i = `0`; i < Lines.size(); ++i) {
600	llvm::dbgs() << i << " \|" << Content[i] << "\| "
601	<< "CC=" << ContentColumn[i] << "\| "
602	<< "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603	}
604	});
605	}
606
607	BreakableToken::Split BreakableBlockComment::getSplit(
608	unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609	unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610	// Don't break lines matching the comment pragmas regex.
611	if (!AlwaysReflow \|\| CommentPragmasRegex.match(String: Content [LineIndex]))
612	return Split (StringRef::npos, `0`);
613	return getCommentSplit(Text: Content [LineIndex].substr(Start: TailOffset),
614	ContentStartColumn, ColumnLimit, TabWidth: Style.TabWidth,
615	Encoding, Style, DecorationEndsWithStar: Decoration.ends_with(Suffix: "*"));
616	}
617
618	void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619	int IndentDelta) {
620	// When in a preprocessor directive, the trailing backslash in a block comment
621	// is not needed, but can serve a purpose of uniformity with necessary escaped
622	// newlines outside the comment. In this case we remove it here before
623	// trimming the trailing whitespace. The backslash will be re-added later when
624	// inserting a line break.
625	size_t EndOfPreviousLine = Lines [LineIndex - `1`].size();
626	if (InPPDirective && Lines [LineIndex - `1`].ends_with(Suffix: "\\"))
627	--EndOfPreviousLine;
628
629	// Calculate the end of the non-whitespace text in the previous line.
630	EndOfPreviousLine =
631	Lines [LineIndex - `1`].find_last_not_of(Chars: Blanks, From: EndOfPreviousLine);
632	if (EndOfPreviousLine == StringRef::npos)
633	EndOfPreviousLine = `0`;
634	else
635	++EndOfPreviousLine;
636	// Calculate the start of the non-whitespace text in the current line.
637	size_t StartOfLine = Lines [LineIndex].find_first_not_of(Chars: Blanks);
638	if (StartOfLine == StringRef::npos)
639	StartOfLine = Lines [LineIndex].size();
640
641	StringRef Whitespace = Lines [LineIndex].substr(Start: `0`, N: StartOfLine);
642	// Adjust Lines to only contain relevant text.
643	size_t PreviousContentOffset =
644	Content [LineIndex - `1`].data() - Lines [LineIndex - `1`].data();
645	Content [LineIndex - `1`] = Lines [LineIndex - `1`].substr(
646	Start: PreviousContentOffset, N: EndOfPreviousLine - PreviousContentOffset);
647	Content [LineIndex] = Lines [LineIndex].substr(Start: StartOfLine);
648
649	// Adjust the start column uniformly across all lines.
650	ContentColumn [LineIndex] =
651	encoding::columnWidthWithTabs(Text: Whitespace, StartColumn: `0`, TabWidth: Style.TabWidth, Encoding) +
652	IndentDelta;
653	}
654
655	unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
656	unsigned Offset,
657	StringRef::size_type Length,
658	unsigned StartColumn) const {
659	return encoding::columnWidthWithTabs(
660	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
661	Encoding);
662	}
663
664	unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
665	unsigned Offset,
666	unsigned StartColumn) const {
667	unsigned LineLength =
668	UnbreakableTailLength +
669	getRangeLength(LineIndex, Offset, Length: StringRef::npos, StartColumn);
670	if (LineIndex + `1` == Lines.size()) {
671	LineLength += `2`;
672	// We never need a decoration when breaking just the trailing "/" postfix.*
673	bool HasRemainingText = Offset < Content [LineIndex].size();
674	if (!HasRemainingText) {
675	bool HasDecoration = Lines [LineIndex].ltrim().starts_with(Prefix: Decoration);
676	if (HasDecoration)
677	LineLength -= Decoration.size();
678	}
679	}
680	return LineLength;
681	}
682
683	unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
684	bool Break) const {
685	if (Break)
686	return IndentAtLineBreak;
687	return std::max(a: `0`, b: ContentColumn [LineIndex]);
688	}
689
690	const llvm::StringSet<>
691	BreakableBlockComment::ContentIndentingJavadocAnnotations = {
692	"@param", "@return", "@returns", "@throws", "@type", "@template",
693	"@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694	};
695
696	unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
697	if (!Style.isJava() && !Style.isJavaScript())
698	return `0`;
699	// The content at LineIndex 0 of a comment like:
700	// /* line 0 /
701	// is " line 0", so we need to skip over the decoration in that case.*
702	StringRef ContentWithNoDecoration = Content [LineIndex];
703	if (LineIndex == `0` && ContentWithNoDecoration.starts_with(Prefix: "*"))
704	ContentWithNoDecoration = ContentWithNoDecoration.substr(Start: `1`).ltrim(Chars: Blanks);
705	StringRef FirstWord = ContentWithNoDecoration.substr(
706	Start: `0`, N: ContentWithNoDecoration.find_first_of(Chars: Blanks));
707	if (ContentIndentingJavadocAnnotations.contains(key: FirstWord))
708	return Style.ContinuationIndentWidth;
709	return `0`;
710	}
711
712	void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
713	Split Split, unsigned ContentIndent,
714	WhitespaceManager &Whitespaces) const {
715	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
716	StringRef Prefix = Decoration;
717	// We need this to account for the case when we have a decoration " " for all*
718	// the lines except for the last one, where the star in "/" acts as a*
719	// decoration.
720	unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721	if (LineIndex + `1` == Lines.size() &&
722	Text.size() == Split.first + Split.second) {
723	// For the last line we need to break before "/", but not to add "* ".*
724	Prefix = "";
725	if (LocalIndentAtLineBreak >= `2`)
726	LocalIndentAtLineBreak -= `2`;
727	}
728	// The split offset is from the beginning of the line. Convert it to an offset
729	// from the beginning of the token text.
730	unsigned BreakOffsetInToken =
731	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
732	unsigned CharsToRemove = Split.second;
733	assert(LocalIndentAtLineBreak >= Prefix.size());
734	std::string PrefixWithTrailingIndent = std::string (Prefix);
735	PrefixWithTrailingIndent.append(n: ContentIndent, c: `' '`);
736	Whitespaces.replaceWhitespaceInToken(
737	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
738	CurrentPrefix: PrefixWithTrailingIndent, InPPDirective, /Newlines=/`1`,
739	/Spaces=/LocalIndentAtLineBreak + ContentIndent -
740	PrefixWithTrailingIndent.size());
741	}
742
743	BreakableToken::Split BreakableBlockComment::getReflowSplit(
744	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745	if (!mayReflow(LineIndex, CommentPragmasRegex))
746	return Split (StringRef::npos, `0`);
747
748	// If we're reflowing into a line with content indent, only reflow the next
749	// line if its starting whitespace matches the content indent.
750	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
751	if (LineIndex) {
752	unsigned PreviousContentIndent = getContentIndent(LineIndex: LineIndex - `1`);
753	if (PreviousContentIndent && Trimmed != StringRef::npos &&
754	Trimmed != PreviousContentIndent) {
755	return Split (StringRef::npos, `0`);
756	}
757	}
758
759	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
760	}
761
762	bool BreakableBlockComment::introducesBreakBeforeToken() const {
763	// A break is introduced when we want delimiters on newline.
764	return DelimitersOnNewline &&
765	Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks) != StringRef::npos;
766	}
767
768	void BreakableBlockComment::reflow(unsigned LineIndex,
769	WhitespaceManager &Whitespaces) const {
770	StringRef TrimmedContent = Content [LineIndex].ltrim(Chars: Blanks);
771	// Here we need to reflow.
772	assert(Tokens[LineIndex - `1`] == Tokens[LineIndex] &&
773	"Reflowing whitespace within a token");
774	// This is the offset of the end of the last line relative to the start of
775	// the token text in the token.
776	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
777	Content [LineIndex - `1`].size() -
778	tokenAt(LineIndex).TokenText.data();
779	unsigned WhitespaceLength = TrimmedContent.data() -
780	tokenAt(LineIndex).TokenText.data() -
781	WhitespaceOffsetInToken;
782	Whitespaces.replaceWhitespaceInToken(
783	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken,
784	/ReplaceChars=/WhitespaceLength, /PreviousPostfix=/"",
785	/CurrentPrefix=/ReflowPrefix, InPPDirective, /Newlines=/`0`,
786	/Spaces=/`0`);
787	}
788
789	void BreakableBlockComment::adaptStartOfLine(
790	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
791	if (LineIndex == `0`) {
792	if (DelimitersOnNewline) {
793	// Since we're breaking at index 1 below, the break position and the
794	// break length are the same.
795	// Note: this works because getCommentSplit is careful never to split at
796	// the beginning of a line.
797	size_t BreakLength = Lines [`0`].substr(Start: `1`).find_first_not_of(Chars: Blanks);
798	if (BreakLength != StringRef::npos) {
799	insertBreak(LineIndex, TailOffset: `0`, Split: Split (`1`, BreakLength), /ContentIndent=/`0`,
800	Whitespaces);
801	}
802	}
803	return;
804	}
805	// Here no reflow with the previous line will happen.
806	// Fix the decoration of the line at LineIndex.
807	StringRef Prefix = Decoration;
808	if (Content [LineIndex].empty()) {
809	if (LineIndex + `1` == Lines.size()) {
810	if (!LastLineNeedsDecoration) {
811	// If the last line was empty, we don't need a prefix, as the / will*
812	// line up with the decoration (if it exists).
813	Prefix = "";
814	}
815	} else if (!Decoration.empty()) {
816	// For other empty lines, if we do have a decoration, adapt it to not
817	// contain a trailing whitespace.
818	Prefix = Prefix.substr(Start: `0`, N: `1`);
819	}
820	} else if (ContentColumn [LineIndex] == `1`) {
821	// This line starts immediately after the decorating .*
822	Prefix = Prefix.substr(Start: `0`, N: `1`);
823	}
824	// This is the offset of the end of the last line relative to the start of the
825	// token text in the token.
826	unsigned WhitespaceOffsetInToken = Content [LineIndex - `1`].data() +
827	Content [LineIndex - `1`].size() -
828	tokenAt(LineIndex).TokenText.data();
829	unsigned WhitespaceLength = Content [LineIndex].data() -
830	tokenAt(LineIndex).TokenText.data() -
831	WhitespaceOffsetInToken;
832	Whitespaces.replaceWhitespaceInToken(
833	Tok: tokenAt(LineIndex), Offset: WhitespaceOffsetInToken, ReplaceChars: WhitespaceLength, PreviousPostfix: "", CurrentPrefix: Prefix,
834	InPPDirective, /Newlines=/`1`, Spaces: ContentColumn [LineIndex] - Prefix.size());
835	}
836
837	BreakableToken::Split
838	BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
839	if (DelimitersOnNewline) {
840	// Replace the trailing whitespace of the last line with a newline.
841	// In case the last line is empty, the ending '/' is already on its own*
842	// line.
843	StringRef Line = Content.back().substr(Start: TailOffset);
844	StringRef TrimmedLine = Line.rtrim(Chars: Blanks);
845	if (!TrimmedLine.empty())
846	return Split (TrimmedLine.size(), Line.size() - TrimmedLine.size());
847	}
848	return Split (StringRef::npos, `0`);
849	}
850
851	bool BreakableBlockComment::mayReflow(
852	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853	// Content[LineIndex] may exclude the indent after the '' decoration. In that*
854	// case, we compute the start of the comment pragma manually.
855	StringRef IndentContent = Content [LineIndex];
856	if (Lines [LineIndex].ltrim(Chars: Blanks).starts_with(Prefix: "*"))
857	IndentContent = Lines [LineIndex].ltrim(Chars: Blanks).substr(Start: `1`);
858	return LineIndex > `0` && AlwaysReflow &&
859	!CommentPragmasRegex.match(String: IndentContent) &&
860	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
861	!switchesFormatting(Token: tokenAt(LineIndex));
862	}
863
864	BreakableLineCommentSection::BreakableLineCommentSection(
865	const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
866	encoding::Encoding Encoding, const FormatStyle &Style)
867	: BreakableComment (Token, StartColumn, InPPDirective, Encoding, Style) {
868	assert(Tok.is(TT_LineComment) &&
869	"line comment section must start with a line comment");
870	FormatToken LineTok = nullptr*;
871	const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
872	// How many spaces we changed in the first line of the section, this will be
873	// applied in all following lines
874	int FirstLineSpaceChange = `0`;
875	for (const FormatToken *CurrentTok = &Tok;
876	CurrentTok && CurrentTok->is(TT: TT_LineComment);
877	CurrentTok = CurrentTok->Next) {
878	LastLineTok = LineTok;
879	StringRef TokenText(CurrentTok->TokenText);
880	assert((TokenText.starts_with("//") \|\| TokenText.starts_with("#")) &&
881	"unsupported line comment prefix, '//' and '#' are supported");
882	size_t FirstLineIndex = Lines.size();
883	TokenText.split(A&: Lines, Separator: "\n");
884	Content.resize(N: Lines.size());
885	ContentColumn.resize(N: Lines.size());
886	PrefixSpaceChange.resize(N: Lines.size());
887	Tokens.resize(N: Lines.size());
888	Prefix.resize(N: Lines.size());
889	OriginalPrefix.resize(N: Lines.size());
890	for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
891	Lines [i] = Lines [i].ltrim(Chars: Blanks);
892	StringRef IndentPrefix = getLineCommentIndentPrefix(Comment: Lines [i], Style);
893	OriginalPrefix [i] = IndentPrefix;
894	const int SpacesInPrefix = llvm::count(Range&: IndentPrefix, Element: `' '`);
895
896	// This lambda also considers multibyte character that is not handled in
897	// functions like isPunctuation provided by CharInfo.
898	const auto NoSpaceBeforeFirstCommentChar = [&]() {
899	assert(Lines[i].size() > IndentPrefix.size());
900	const char FirstCommentChar = Lines [i][IndentPrefix.size()];
901	const unsigned FirstCharByteSize =
902	encoding::getCodePointNumBytes(FirstChar: FirstCommentChar, Encoding);
903	if (encoding::columnWidth(
904	Text: Lines [i].substr(Start: IndentPrefix.size(), N: FirstCharByteSize),
905	Encoding) != `1`) {
906	return false;
907	}
908	// In C-like comments, add a space before #. For example this is useful
909	// to preserve the relative indentation when commenting out code with
910	// #includes.
911	//
912	// In languages using # as the comment leader such as proto, don't
913	// add a space to support patterns like:
914	// #########
915	// # section
916	// #########
917	if (FirstCommentChar == `'#'` && !TokenText.starts_with(Prefix: "#"))
918	return false;
919	return FirstCommentChar == `'\\'` \|\| isPunctuation(c: FirstCommentChar) \|\|
920	isHorizontalWhitespace(c: FirstCommentChar);
921	};
922
923	// On the first line of the comment section we calculate how many spaces
924	// are to be added or removed, all lines after that just get only the
925	// change and we will not look at the maximum anymore. Additionally to the
926	// actual first line, we calculate that when the non space Prefix changes,
927	// e.g. from "///" to "//".
928	if (i == `0` \|\| OriginalPrefix [i].rtrim(Chars: Blanks) !=
929	OriginalPrefix [i - `1`].rtrim(Chars: Blanks)) {
930	if (SpacesInPrefix < Minimum && Lines [i].size() > IndentPrefix.size() &&
931	!NoSpaceBeforeFirstCommentChar ()) {
932	FirstLineSpaceChange = Minimum - SpacesInPrefix;
933	} else if (static_cast<unsigned>(SpacesInPrefix) >
934	Style.SpacesInLineCommentPrefix.Maximum) {
935	FirstLineSpaceChange =
936	Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
937	} else {
938	FirstLineSpaceChange = `0`;
939	}
940	}
941
942	if (Lines [i].size() != IndentPrefix.size()) {
943	PrefixSpaceChange [i] = FirstLineSpaceChange;
944
945	if (SpacesInPrefix + PrefixSpaceChange [i] < Minimum) {
946	PrefixSpaceChange [i] +=
947	Minimum - (SpacesInPrefix + PrefixSpaceChange [i]);
948	}
949
950	assert(Lines[i].size() > IndentPrefix.size());
951	const auto FirstNonSpace = Lines [i][IndentPrefix.size()];
952	const bool IsFormatComment = LineTok && switchesFormatting(Token: *LineTok);
953	const bool LineRequiresLeadingSpace =
954	!NoSpaceBeforeFirstCommentChar () \|\|
955	(FirstNonSpace == `'}'` && FirstLineSpaceChange != `0`);
956	const bool AllowsSpaceChange =
957	!IsFormatComment &&
958	(SpacesInPrefix != `0` \|\| LineRequiresLeadingSpace);
959
960	if (PrefixSpaceChange [i] > `0` && AllowsSpaceChange) {
961	Prefix [i] = IndentPrefix.str();
962	Prefix [i].append(n: PrefixSpaceChange [i], c: `' '`);
963	} else if (PrefixSpaceChange [i] < `0` && AllowsSpaceChange) {
964	Prefix [i] = IndentPrefix
965	.drop_back(N: std::min<std::size_t>(
966	a: -PrefixSpaceChange [i], b: SpacesInPrefix))
967	.str();
968	} else {
969	Prefix [i] = IndentPrefix.str();
970	}
971	} else {
972	// If the IndentPrefix is the whole line, there is no content and we
973	// drop just all space
974	Prefix [i] = IndentPrefix.drop_back(N: SpacesInPrefix).str();
975	}
976
977	Tokens [i] = LineTok;
978	Content [i] = Lines [i].substr(Start: IndentPrefix.size());
979	ContentColumn [i] =
980	StartColumn + encoding::columnWidthWithTabs(Text: Prefix [i], StartColumn,
981	TabWidth: Style.TabWidth, Encoding);
982
983	// Calculate the end of the non-whitespace text in this line.
984	size_t EndOfLine = Content [i].find_last_not_of(Chars: Blanks);
985	if (EndOfLine == StringRef::npos)
986	EndOfLine = Content [i].size();
987	else
988	++EndOfLine;
989	Content [i] = Content [i].substr(Start: `0`, N: EndOfLine);
990	}
991	LineTok = CurrentTok->Next;
992	if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
993	// A line comment section needs to broken by a line comment that is
994	// preceded by at least two newlines. Note that we put this break here
995	// instead of breaking at a previous stage during parsing, since that
996	// would split the contents of the enum into two unwrapped lines in this
997	// example, which is undesirable:
998	// enum A {
999	// a, // comment about a
1000	//
1001	// // comment about b
1002	// b
1003	// };
1004	//
1005	// FIXME: Consider putting separate line comment sections as children to
1006	// the unwrapped line instead.
1007	break;
1008	}
1009	}
1010	}
1011
1012	unsigned
1013	BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1014	StringRef::size_type Length,
1015	unsigned StartColumn) const {
1016	return encoding::columnWidthWithTabs(
1017	Text: Content [LineIndex].substr(Start: Offset, N: Length), StartColumn, TabWidth: Style.TabWidth,
1018	Encoding);
1019	}
1020
1021	unsigned
1022	BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1023	bool /Break/) const {
1024	return ContentColumn [LineIndex];
1025	}
1026
1027	void BreakableLineCommentSection::insertBreak(
1028	unsigned LineIndex, unsigned TailOffset, Split Split,
1029	unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1030	StringRef Text = Content [LineIndex].substr(Start: TailOffset);
1031	// Compute the offset of the split relative to the beginning of the token
1032	// text.
1033	unsigned BreakOffsetInToken =
1034	Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1035	unsigned CharsToRemove = Split.second;
1036	Whitespaces.replaceWhitespaceInToken(
1037	Tok: tokenAt(LineIndex), Offset: BreakOffsetInToken, ReplaceChars: CharsToRemove, PreviousPostfix: "",
1038	CurrentPrefix: Prefix [LineIndex], InPPDirective, /Newlines=/`1`,
1039	/Spaces=/ContentColumn [LineIndex] - Prefix [LineIndex].size());
1040	}
1041
1042	BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1043	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1044	if (!mayReflow(LineIndex, CommentPragmasRegex))
1045	return Split (StringRef::npos, `0`);
1046
1047	size_t Trimmed = Content [LineIndex].find_first_not_of(Chars: Blanks);
1048
1049	// In a line comment section each line is a separate token; thus, after a
1050	// split we replace all whitespace before the current line comment token
1051	// (which does not need to be included in the split), plus the start of the
1052	// line up to where the content starts.
1053	return Split (`0`, Trimmed != StringRef::npos ? Trimmed : `0`);
1054	}
1055
1056	void BreakableLineCommentSection::reflow(unsigned LineIndex,
1057	WhitespaceManager &Whitespaces) const {
1058	if (LineIndex > `0` && Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1059	// Reflow happens between tokens. Replace the whitespace between the
1060	// tokens by the empty string.
1061	Whitespaces.replaceWhitespace(
1062	Tok&: Tokens [LineIndex], /Newlines=/`0`, /Spaces=/*`0`,
1063	/StartOfTokenColumn=/StartColumn, /IsAligned=/true,
1064	/InPPDirective=/false);
1065	} else if (LineIndex > `0`) {
1066	// In case we're reflowing after the '\' in:
1067	//
1068	// // line comment \
1069	// // line 2
1070	//
1071	// the reflow happens inside the single comment token (it is a single line
1072	// comment with an unescaped newline).
1073	// Replace the whitespace between the '\' and '//' with the empty string.
1074	//
1075	// Offset points to after the '\' relative to start of the token.
1076	unsigned Offset = Lines [LineIndex - `1`].data() +
1077	Lines [LineIndex - `1`].size() -
1078	tokenAt(LineIndex: LineIndex - `1`).TokenText.data();
1079	// WhitespaceLength is the number of chars between the '\' and the '//' on
1080	// the next line.
1081	unsigned WhitespaceLength =
1082	Lines [LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1083	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1084	/ReplaceChars=/WhitespaceLength,
1085	/PreviousPostfix=/"",
1086	/CurrentPrefix=/"",
1087	/InPPDirective=/false,
1088	/Newlines=/`0`,
1089	/Spaces=/`0`);
1090	}
1091	// Replace the indent and prefix of the token with the reflow prefix.
1092	unsigned Offset =
1093	Lines [LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1094	unsigned WhitespaceLength =
1095	Content [LineIndex].data() - Lines [LineIndex].data();
1096	Whitespaces.replaceWhitespaceInToken(Tok: *Tokens [LineIndex], Offset,
1097	/ReplaceChars=/WhitespaceLength,
1098	/PreviousPostfix=/"",
1099	/CurrentPrefix=/ReflowPrefix,
1100	/InPPDirective=/false,
1101	/Newlines=/`0`,
1102	/Spaces=/`0`);
1103	}
1104
1105	void BreakableLineCommentSection::adaptStartOfLine(
1106	unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1107	// If this is the first line of a token, we need to inform Whitespace Manager
1108	// about it: either adapt the whitespace range preceding it, or mark it as an
1109	// untouchable token.
1110	// This happens for instance here:
1111	// // line 1 \
1112	// // line 2
1113	if (LineIndex > `0` && Tokens [LineIndex] != Tokens [LineIndex - `1`]) {
1114	// This is the first line for the current token, but no reflow with the
1115	// previous token is necessary. However, we still may need to adjust the
1116	// start column. Note that ContentColumn[LineIndex] is the expected
1117	// content column after a possible update to the prefix, hence the prefix
1118	// length change is included.
1119	unsigned LineColumn =
1120	ContentColumn [LineIndex] -
1121	(Content [LineIndex].data() - Lines [LineIndex].data()) +
1122	(OriginalPrefix [LineIndex].size() - Prefix [LineIndex].size());
1123
1124	// We always want to create a replacement instead of adding an untouchable
1125	// token, even if LineColumn is the same as the original column of the
1126	// token. This is because WhitespaceManager doesn't align trailing
1127	// comments if they are untouchable.
1128	Whitespaces.replaceWhitespace(Tok&: *Tokens [LineIndex],
1129	/Newlines=/`1`,
1130	/Spaces=/LineColumn,
1131	/StartOfTokenColumn=/LineColumn,
1132	/IsAligned=/true,
1133	/InPPDirective=/false);
1134	}
1135	if (OriginalPrefix [LineIndex] != Prefix [LineIndex]) {
1136	// Adjust the prefix if necessary.
1137	const auto SpacesToRemove = -std::min(a: PrefixSpaceChange [LineIndex], b: `0`);
1138	const auto SpacesToAdd = std::max(a: PrefixSpaceChange [LineIndex], b: `0`);
1139	Whitespaces.replaceWhitespaceInToken(
1140	Tok: tokenAt(LineIndex), Offset: OriginalPrefix [LineIndex].size() - SpacesToRemove,
1141	/ReplaceChars=/SpacesToRemove, PreviousPostfix: "", CurrentPrefix: "", /InPPDirective=/false,
1142	/Newlines=/`0`, /Spaces=/SpacesToAdd);
1143	}
1144	}
1145
1146	void BreakableLineCommentSection::updateNextToken(LineState &State) const {
1147	if (LastLineTok)
1148	State.NextToken = LastLineTok->Next;
1149	}
1150
1151	bool BreakableLineCommentSection::mayReflow(
1152	unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1153	// Line comments have the indent as part of the prefix, so we need to
1154	// recompute the start of the line.
1155	StringRef IndentContent = Content [LineIndex];
1156	if (Lines [LineIndex].starts_with(Prefix: "//"))
1157	IndentContent = Lines [LineIndex].substr(Start: `2`);
1158	// FIXME: Decide whether we want to reflow non-regular indents:
1159	// Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1160	// OriginalPrefix[LineIndex-1]. That means we don't reflow
1161	// // text that protrudes
1162	// // into text with different indent
1163	// We do reflow in that case in block comments.
1164	return LineIndex > `0` && AlwaysReflow &&
1165	!CommentPragmasRegex.match(String: IndentContent) &&
1166	mayReflowContent(Content: Content [LineIndex]) && !Tok.Finalized &&
1167	!switchesFormatting(Token: tokenAt(LineIndex)) &&
1168	OriginalPrefix [LineIndex] == OriginalPrefix [LineIndex - `1`];
1169	}
1170
1171	} // namespace format
1172	} // namespace clang
1173

source code of clang/lib/Format/BreakableToken.cpp