1/*
2 SPDX-FileCopyrightText: 2022 Waqar Ahmed <waqar.17a@gmail.com>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6#include "kateindentdetecter.h"
7
8#include "katedocument.h"
9
10KateIndentDetecter::KateIndentDetecter(KTextEditor::DocumentPrivate *doc)
11 : m_doc(doc)
12{
13}
14
15struct SpacesDiffResult {
16 int spacesDiff = 0;
17 bool looksLikeAlignment = false;
18};
19
20static SpacesDiffResult spacesDiff(const QString &a, int aLength, const QString &b, int bLength)
21{
22 SpacesDiffResult result;
23 result.spacesDiff = 0;
24 result.looksLikeAlignment = false;
25
26 // This can go both ways (e.g.):
27 // - a: "\t"
28 // - b: "\t "
29 // => This should count 1 tab and 4 spaces
30
31 int i = 0;
32
33 for (i = 0; i < aLength && i < bLength; i++) {
34 const auto aCharCode = a.at(i);
35 const auto bCharCode = b.at(i);
36
37 if (aCharCode != bCharCode) {
38 break;
39 }
40 }
41
42 int aSpacesCnt = 0;
43 int aTabsCount = 0;
44 for (int j = i; j < aLength; j++) {
45 const auto aCharCode = a.at(i: j);
46 if (aCharCode == QLatin1Char(' ')) {
47 aSpacesCnt++;
48 } else {
49 aTabsCount++;
50 }
51 }
52
53 int bSpacesCnt = 0;
54 int bTabsCount = 0;
55 for (int j = i; j < bLength; j++) {
56 const auto bCharCode = b.at(i: j);
57 if (bCharCode == QLatin1Char(' ')) {
58 bSpacesCnt++;
59 } else {
60 bTabsCount++;
61 }
62 }
63
64 if (aSpacesCnt > 0 && aTabsCount > 0) {
65 return result;
66 }
67 if (bSpacesCnt > 0 && bTabsCount > 0) {
68 return result;
69 }
70
71 const auto tabsDiff = std::abs(x: aTabsCount - bTabsCount);
72 const auto spacesDiff = std::abs(x: aSpacesCnt - bSpacesCnt);
73
74 if (tabsDiff == 0) {
75 // check if the indentation difference might be caused by alignment reasons
76 // sometime folks like to align their code, but this should not be used as a hint
77 result.spacesDiff = spacesDiff;
78
79 if (spacesDiff > 0 && 0 <= bSpacesCnt - 1 && bSpacesCnt - 1 < a.length() && bSpacesCnt < b.length()) {
80 if (b.at(i: bSpacesCnt) != QLatin1Char(' ') && a.at(i: bSpacesCnt - 1) == QLatin1Char(' ')) {
81 if (a.at(i: a.length() - 1) == QLatin1Char(',')) {
82 // This looks like an alignment desire: e.g.
83 // const a = b + c,
84 // d = b - c;
85 result.looksLikeAlignment = true;
86 }
87 }
88 }
89 return result;
90 }
91 if (spacesDiff % tabsDiff == 0) {
92 result.spacesDiff = spacesDiff / tabsDiff;
93 return result;
94 }
95 return result;
96}
97
98KateIndentDetecter::Result KateIndentDetecter::detect(int defaultTabSize, bool defaultInsertSpaces)
99{
100 // Look at most at the first 10k lines
101 const int linesCount = std::min(a: m_doc->lines(), b: 10000);
102
103 int linesIndentedWithTabsCount = 0; // number of lines that contain at least one tab in indentation
104 int linesIndentedWithSpacesCount = 0; // number of lines that contain only spaces in indentation
105
106 QString previousLineText; // content of latest line that contained non-whitespace chars
107 int previousLineIndentation = 0; // index at which latest line contained the first non-whitespace char
108
109 constexpr int ALLOWED_TAB_SIZE_GUESSES[7] = {2, 4, 6, 8, 3, 5, 7}; // prefer even guesses for `tabSize`, limit to [2, 8].
110 constexpr int MAX_ALLOWED_TAB_SIZE_GUESS = 8; // max(ALLOWED_TAB_SIZE_GUESSES) = 8
111
112 int spacesDiffCount[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // `tabSize` scores
113 SpacesDiffResult tmp;
114
115 for (int lineNumber = 0; lineNumber < linesCount; lineNumber++) {
116 const QString currentLineText = m_doc->line(line: lineNumber);
117 const int currentLineLength = currentLineText.length();
118
119 bool currentLineHasContent = false; // does `currentLineText` contain non-whitespace chars
120 int currentLineIndentation = 0; // index at which `currentLineText` contains the first non-whitespace char
121 int currentLineSpacesCount = 0; // count of spaces found in `currentLineText` indentation
122 int currentLineTabsCount = 0; // count of tabs found in `currentLineText` indentation
123 for (int j = 0, lenJ = currentLineLength; j < lenJ; j++) {
124 const auto charCode = currentLineText.at(i: j);
125
126 if (charCode == QLatin1Char('\t')) {
127 currentLineTabsCount++;
128 } else if (charCode == QLatin1Char(' ')) {
129 currentLineSpacesCount++;
130 } else {
131 // Hit non whitespace character on this line
132 currentLineHasContent = true;
133 currentLineIndentation = j;
134 break;
135 }
136 }
137
138 // Ignore empty or only whitespace lines
139 if (!currentLineHasContent) {
140 continue;
141 }
142
143 if (currentLineTabsCount > 0) {
144 linesIndentedWithTabsCount++;
145 } else if (currentLineSpacesCount > 1) {
146 linesIndentedWithSpacesCount++;
147 }
148
149 tmp = spacesDiff(a: previousLineText, aLength: previousLineIndentation, b: currentLineText, bLength: currentLineIndentation);
150
151 if (tmp.looksLikeAlignment) {
152 // if defaultInsertSpaces === true && the spaces count == tabSize, we may want to count it as valid indentation
153 //
154 // - item1
155 // - item2
156 //
157 // otherwise skip this line entirely
158 //
159 // const a = 1,
160 // b = 2;
161
162 if (!(defaultInsertSpaces && defaultTabSize == tmp.spacesDiff)) {
163 continue;
164 }
165 }
166
167 const int currentSpacesDiff = tmp.spacesDiff;
168 if (currentSpacesDiff <= MAX_ALLOWED_TAB_SIZE_GUESS) {
169 spacesDiffCount[currentSpacesDiff]++;
170 }
171
172 previousLineText = currentLineText;
173 previousLineIndentation = currentLineIndentation;
174 }
175
176 bool insertSpaces = defaultInsertSpaces;
177 if (linesIndentedWithTabsCount != linesIndentedWithSpacesCount) {
178 insertSpaces = (linesIndentedWithTabsCount < linesIndentedWithSpacesCount);
179 }
180
181 int tabSize = defaultTabSize;
182
183 // Guess tabSize only if inserting spaces...
184 if (insertSpaces) {
185 int tabSizeScore = 0;
186 for (int i = 0; i < 7; ++i) {
187 int possibleTabSize = ALLOWED_TAB_SIZE_GUESSES[i];
188 const int possibleTabSizeScore = spacesDiffCount[possibleTabSize];
189 if (possibleTabSizeScore > tabSizeScore) {
190 tabSizeScore = possibleTabSizeScore;
191 tabSize = possibleTabSize;
192 }
193 }
194
195 // Let a tabSize of 2 win even if it is not the maximum
196 // (only in case 4 was guessed)
197 if (tabSize == 4 && spacesDiffCount[4] > 0 && spacesDiffCount[2] > 0 && spacesDiffCount[2] >= spacesDiffCount[4] / 2) {
198 tabSize = 2;
199 }
200
201 // If no indent detected, check if the file is 1 space indented
202 if (tabSizeScore == 0) {
203 const auto it = std::max_element(first: spacesDiffCount, last: spacesDiffCount + 9);
204 const auto maxIdx = std::distance(first: spacesDiffCount, last: it);
205 if (maxIdx == 1) {
206 tabSize = 1;
207 }
208 }
209 }
210
211 return {.indentWidth: tabSize, .indentUsingSpaces: insertSpaces};
212}
213

source code of ktexteditor/src/utils/kateindentdetecter.cpp