1 | /* |
2 | SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com> |
3 | |
4 | SPDX-License-Identifier: LGPL-2.0-or-later |
5 | */ |
6 | #include "kateindentdetecter.h" |
7 | |
8 | #include "katedocument.h" |
9 | |
10 | KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc) |
11 | : m_doc(doc) |
12 | { |
13 | } |
14 | |
15 | struct SpacesDiffResult { |
16 | int spacesDiff = 0; |
17 | bool looksLikeAlignment = false; |
18 | }; |
19 | |
20 | static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength) |
21 | { |
22 | SpacesDiffResult result; |
23 | result.spacesDiff = 0; |
24 | result.looksLikeAlignment = false; |
25 | |
26 | // This can go both ways (e.g.): |
27 | // - a: "\t" |
28 | // - b: "\t " |
29 | // => This should count 1 tab and 4 spaces |
30 | |
31 | int i = 0; |
32 | |
33 | for (i = 0; i < aLength && i < bLength; i++) { |
34 | const auto aCharCode = a.at(i); |
35 | const auto bCharCode = b.at(i); |
36 | |
37 | if (aCharCode != bCharCode) { |
38 | break; |
39 | } |
40 | } |
41 | |
42 | int aSpacesCnt = 0; |
43 | int aTabsCount = 0; |
44 | for (int j = i; j < aLength; j++) { |
45 | const auto aCharCode = a.at(i: j); |
46 | if (aCharCode == QLatin1Char(' ')) { |
47 | aSpacesCnt++; |
48 | } else { |
49 | aTabsCount++; |
50 | } |
51 | } |
52 | |
53 | int bSpacesCnt = 0; |
54 | int bTabsCount = 0; |
55 | for (int j = i; j < bLength; j++) { |
56 | const auto bCharCode = b.at(i: j); |
57 | if (bCharCode == QLatin1Char(' ')) { |
58 | bSpacesCnt++; |
59 | } else { |
60 | bTabsCount++; |
61 | } |
62 | } |
63 | |
64 | if (aSpacesCnt > 0 && aTabsCount > 0) { |
65 | return result; |
66 | } |
67 | if (bSpacesCnt > 0 && bTabsCount > 0) { |
68 | return result; |
69 | } |
70 | |
71 | const auto tabsDiff = std::abs(x: aTabsCount - bTabsCount); |
72 | const auto spacesDiff = std::abs(x: aSpacesCnt - bSpacesCnt); |
73 | |
74 | if (tabsDiff == 0) { |
75 | // check if the indentation difference might be caused by alignment reasons |
76 | // sometime folks like to align their code, but this should not be used as a hint |
77 | result.spacesDiff = spacesDiff; |
78 | |
79 | if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) { |
80 | if (b.at(i: bSpacesCnt) != QLatin1Char(' ') && a.at(i: bSpacesCnt - 1) == QLatin1Char(' ')) { |
81 | if (a.at(i: a.length() - 1) == QLatin1Char(',')) { |
82 | // This looks like an alignment desire: e.g. |
83 | // const a = b + c, |
84 | // d = b - c; |
85 | result.looksLikeAlignment = true; |
86 | } |
87 | } |
88 | } |
89 | return result; |
90 | } |
91 | if (spacesDiff % tabsDiff == 0) { |
92 | result.spacesDiff = spacesDiff / tabsDiff; |
93 | return result; |
94 | } |
95 | return result; |
96 | } |
97 | |
98 | KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces) |
99 | { |
100 | // Look at most at the first 10k lines |
101 | const int linesCount = std::min(a: m_doc->lines(), b: 10000); |
102 | |
103 | int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation |
104 | int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation |
105 | |
106 | QString previousLineText; // content of latest line that contained non-whitespace chars |
107 | int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char |
108 | |
109 | constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8]. |
110 | constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8 |
111 | |
112 | int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores |
113 | SpacesDiffResult tmp; |
114 | |
115 | for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) { |
116 | const QString currentLineText = m_doc->line(line: lineNumber); |
117 | const int currentLineLength = currentLineText.length(); |
118 | |
119 | bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars |
120 | int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char |
121 | int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation |
122 | int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation |
123 | for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) { |
124 | const auto charCode = currentLineText.at(i: j); |
125 | |
126 | if (charCode == QLatin1Char('\t')) { |
127 | currentLineTabsCount++; |
128 | } else if (charCode == QLatin1Char(' ')) { |
129 | currentLineSpacesCount++; |
130 | } else { |
131 | // Hit non whitespace character on this line |
132 | currentLineHasContent = true; |
133 | currentLineIndentation = j; |
134 | break; |
135 | } |
136 | } |
137 | |
138 | // Ignore empty or only whitespace lines |
139 | if (!currentLineHasContent) { |
140 | continue; |
141 | } |
142 | |
143 | if (currentLineTabsCount > 0) { |
144 | linesIndentedWithTabsCount++; |
145 | } else if (currentLineSpacesCount > 1) { |
146 | linesIndentedWithSpacesCount++; |
147 | } |
148 | |
149 | tmp = spacesDiff(a: previousLineText, aLength: previousLineIndentation, b: currentLineText, bLength: currentLineIndentation); |
150 | |
151 | if (tmp.looksLikeAlignment) { |
152 | // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation |
153 | // |
154 | // - item1 |
155 | // - item2 |
156 | // |
157 | // otherwise skip this line entirely |
158 | // |
159 | // const a = 1, |
160 | // b = 2; |
161 | |
162 | if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) { |
163 | continue; |
164 | } |
165 | } |
166 | |
167 | const int currentSpacesDiff = tmp.spacesDiff; |
168 | if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) { |
169 | spacesDiffCount[currentSpacesDiff]++; |
170 | } |
171 | |
172 | previousLineText = currentLineText; |
173 | previousLineIndentation = currentLineIndentation; |
174 | } |
175 | |
176 | bool insertSpaces = defaultInsertSpaces; |
177 | if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) { |
178 | insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount); |
179 | } |
180 | |
181 | int tabSize = defaultTabSize; |
182 | |
183 | // Guess tabSize only if inserting spaces... |
184 | if (insertSpaces) { |
185 | int tabSizeScore = 0; |
186 | for (int i = 0; i < 7; ++i) { |
187 | int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i]; |
188 | const int possibleTabSizeScore = spacesDiffCount[possibleTabSize]; |
189 | if (possibleTabSizeScore > tabSizeScore) { |
190 | tabSizeScore = possibleTabSizeScore; |
191 | tabSize = possibleTabSize; |
192 | } |
193 | } |
194 | |
195 | // Let a tabSize of 2 win even if it is not the maximum |
196 | // (only in case 4 was guessed) |
197 | if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) { |
198 | tabSize = 2; |
199 | } |
200 | |
201 | // If no indent detected, check if the file is 1 space indented |
202 | if (tabSizeScore == 0) { |
203 | const auto it = std::max_element(first: spacesDiffCount, last: spacesDiffCount + 9); |
204 | const auto maxIdx = std::distance(first: spacesDiffCount, last: it); |
205 | if (maxIdx == 1) { |
206 | tabSize = 1; |
207 | } |
208 | } |
209 | } |
210 | |
211 | return {.indentWidth: tabSize, .indentUsingSpaces: insertSpaces}; |
212 | } |
213 | |