1 | //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer |
11 | /// literal separators. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "IntegerLiteralSeparatorFixer.h" |
16 | |
17 | namespace clang { |
18 | namespace format { |
19 | |
20 | enum class Base { Binary, Decimal, Hex, Other }; |
21 | |
22 | static Base getBase(const StringRef IntegerLiteral) { |
23 | assert(IntegerLiteral.size() > 1); |
24 | |
25 | if (IntegerLiteral[0] > '0') { |
26 | assert(IntegerLiteral[0] <= '9'); |
27 | return Base::Decimal; |
28 | } |
29 | |
30 | assert(IntegerLiteral[0] == '0'); |
31 | |
32 | switch (IntegerLiteral[1]) { |
33 | case 'b': |
34 | case 'B': |
35 | return Base::Binary; |
36 | case 'x': |
37 | case 'X': |
38 | return Base::Hex; |
39 | default: |
40 | return Base::Other; |
41 | } |
42 | } |
43 | |
44 | std::pair<tooling::Replacements, unsigned> |
45 | IntegerLiteralSeparatorFixer::process(const Environment &Env, |
46 | const FormatStyle &Style) { |
47 | switch (Style.Language) { |
48 | case FormatStyle::LK_Cpp: |
49 | case FormatStyle::LK_ObjC: |
50 | Separator = '\''; |
51 | break; |
52 | case FormatStyle::LK_CSharp: |
53 | case FormatStyle::LK_Java: |
54 | case FormatStyle::LK_JavaScript: |
55 | Separator = '_'; |
56 | break; |
57 | default: |
58 | return {}; |
59 | } |
60 | |
61 | const auto &Option = Style.IntegerLiteralSeparator; |
62 | const auto Binary = Option.Binary; |
63 | const auto Decimal = Option.Decimal; |
64 | const auto Hex = Option.Hex; |
65 | const bool SkipBinary = Binary == 0; |
66 | const bool SkipDecimal = Decimal == 0; |
67 | const bool SkipHex = Hex == 0; |
68 | |
69 | if (SkipBinary && SkipDecimal && SkipHex) |
70 | return {}; |
71 | |
72 | const auto BinaryMinDigits = |
73 | std::max(a: (int)Option.BinaryMinDigits, b: Binary + 1); |
74 | const auto DecimalMinDigits = |
75 | std::max(a: (int)Option.DecimalMinDigits, b: Decimal + 1); |
76 | const auto HexMinDigits = std::max(a: (int)Option.HexMinDigits, b: Hex + 1); |
77 | |
78 | const auto &SourceMgr = Env.getSourceManager(); |
79 | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); |
80 | |
81 | const auto ID = Env.getFileID(); |
82 | const auto LangOpts = getFormattingLangOpts(Style); |
83 | Lexer Lex(ID, SourceMgr.getBufferOrFake(FID: ID), SourceMgr, LangOpts); |
84 | Lex.SetCommentRetentionState(true); |
85 | |
86 | Token Tok; |
87 | tooling::Replacements Result; |
88 | |
89 | for (bool Skip = false; !Lex.LexFromRawLexer(Result&: Tok);) { |
90 | auto Length = Tok.getLength(); |
91 | if (Length < 2) |
92 | continue; |
93 | auto Location = Tok.getLocation(); |
94 | auto Text = StringRef(SourceMgr.getCharacterData(SL: Location), Length); |
95 | if (Tok.is(K: tok::comment)) { |
96 | if (isClangFormatOff(Comment: Text)) |
97 | Skip = true; |
98 | else if (isClangFormatOn(Comment: Text)) |
99 | Skip = false; |
100 | continue; |
101 | } |
102 | if (Skip || Tok.isNot(K: tok::numeric_constant) || Text[0] == '.' || |
103 | !AffectedRangeMgr.affectsCharSourceRange( |
104 | Range: CharSourceRange::getCharRange(B: Location, E: Tok.getEndLoc()))) { |
105 | continue; |
106 | } |
107 | const auto B = getBase(IntegerLiteral: Text); |
108 | const bool IsBase2 = B == Base::Binary; |
109 | const bool IsBase10 = B == Base::Decimal; |
110 | const bool IsBase16 = B == Base::Hex; |
111 | if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || |
112 | (IsBase16 && SkipHex) || B == Base::Other) { |
113 | continue; |
114 | } |
115 | if (Style.isCpp()) { |
116 | // Hex alpha digits a-f/A-F must be at the end of the string literal. |
117 | StringRef Suffixes = "_himnsuyd" ; |
118 | if (const auto Pos = |
119 | Text.find_first_of(Chars: IsBase16 ? Suffixes.drop_back() : Suffixes); |
120 | Pos != StringRef::npos) { |
121 | Text = Text.substr(Start: 0, N: Pos); |
122 | Length = Pos; |
123 | } |
124 | } |
125 | if ((IsBase10 && Text.find_last_of(Chars: ".eEfFdDmM" ) != StringRef::npos) || |
126 | (IsBase16 && Text.find_last_of(Chars: ".pP" ) != StringRef::npos)) { |
127 | continue; |
128 | } |
129 | const auto Start = Text[0] == '0' ? 2 : 0; |
130 | auto End = Text.find_first_of(Chars: "uUlLzZn" , From: Start); |
131 | if (End == StringRef::npos) |
132 | End = Length; |
133 | if (Start > 0 || End < Length) { |
134 | Length = End - Start; |
135 | Text = Text.substr(Start, N: Length); |
136 | } |
137 | auto DigitsPerGroup = Decimal; |
138 | auto MinDigits = DecimalMinDigits; |
139 | if (IsBase2) { |
140 | DigitsPerGroup = Binary; |
141 | MinDigits = BinaryMinDigits; |
142 | } else if (IsBase16) { |
143 | DigitsPerGroup = Hex; |
144 | MinDigits = HexMinDigits; |
145 | } |
146 | const auto SeparatorCount = Text.count(C: Separator); |
147 | const int DigitCount = Length - SeparatorCount; |
148 | const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; |
149 | if (RemoveSeparator && SeparatorCount == 0) |
150 | continue; |
151 | if (!RemoveSeparator && SeparatorCount > 0 && |
152 | checkSeparator(IntegerLiteral: Text, DigitsPerGroup)) { |
153 | continue; |
154 | } |
155 | const auto &Formatted = |
156 | format(IntegerLiteral: Text, DigitsPerGroup, DigitCount, RemoveSeparator); |
157 | assert(Formatted != Text); |
158 | if (Start > 0) |
159 | Location = Location.getLocWithOffset(Offset: Start); |
160 | cantFail(Err: Result.add( |
161 | R: tooling::Replacement(SourceMgr, Location, Length, Formatted))); |
162 | } |
163 | |
164 | return {Result, 0}; |
165 | } |
166 | |
167 | bool IntegerLiteralSeparatorFixer::checkSeparator( |
168 | const StringRef IntegerLiteral, int DigitsPerGroup) const { |
169 | assert(DigitsPerGroup > 0); |
170 | |
171 | int I = 0; |
172 | for (auto C : llvm::reverse(C: IntegerLiteral)) { |
173 | if (C == Separator) { |
174 | if (I < DigitsPerGroup) |
175 | return false; |
176 | I = 0; |
177 | } else { |
178 | if (I == DigitsPerGroup) |
179 | return false; |
180 | ++I; |
181 | } |
182 | } |
183 | |
184 | return true; |
185 | } |
186 | |
187 | std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, |
188 | int DigitsPerGroup, |
189 | int DigitCount, |
190 | bool RemoveSeparator) const { |
191 | assert(DigitsPerGroup != 0); |
192 | |
193 | std::string Formatted; |
194 | |
195 | if (RemoveSeparator) { |
196 | for (auto C : IntegerLiteral) |
197 | if (C != Separator) |
198 | Formatted.push_back(c: C); |
199 | return Formatted; |
200 | } |
201 | |
202 | int Remainder = DigitCount % DigitsPerGroup; |
203 | |
204 | int I = 0; |
205 | for (auto C : IntegerLiteral) { |
206 | if (C == Separator) |
207 | continue; |
208 | if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { |
209 | Formatted.push_back(c: Separator); |
210 | I = 0; |
211 | Remainder = 0; |
212 | } |
213 | Formatted.push_back(c: C); |
214 | ++I; |
215 | } |
216 | |
217 | return Formatted; |
218 | } |
219 | |
220 | } // namespace format |
221 | } // namespace clang |
222 | |