1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "rule_p.h"
16#include "state.h"
17#include "state_p.h"
18#include "theme.h"
19
20using namespace KSyntaxHighlighting;
21
22AbstractHighlighterPrivate::AbstractHighlighterPrivate()
23{
24}
25
26AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
27{
28}
29
30void AbstractHighlighterPrivate::ensureDefinitionLoaded()
31{
32 auto defData = DefinitionData::get(def: m_definition);
33 if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) {
34 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
35 m_definition = defData->repo->definitionForName(m_definition.name());
36 defData = DefinitionData::get(def: m_definition);
37 }
38
39 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
40 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
41 }
42
43 if (m_definition.isValid()) {
44 defData->load();
45 }
46}
47
48AbstractHighlighter::AbstractHighlighter()
49 : d_ptr(new AbstractHighlighterPrivate)
50{
51}
52
53AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
54 : d_ptr(dd)
55{
56}
57
58AbstractHighlighter::~AbstractHighlighter()
59{
60 delete d_ptr;
61}
62
63Definition AbstractHighlighter::definition() const
64{
65 return d_ptr->m_definition;
66}
67
68void AbstractHighlighter::setDefinition(const Definition &def)
69{
70 Q_D(AbstractHighlighter);
71 d->m_definition = def;
72}
73
74Theme AbstractHighlighter::theme() const
75{
76 Q_D(const AbstractHighlighter);
77 return d->m_theme;
78}
79
80void AbstractHighlighter::setTheme(const Theme &theme)
81{
82 Q_D(AbstractHighlighter);
83 d->m_theme = theme;
84}
85
86/**
87 * Returns the index of the first non-space character. If the line is empty,
88 * or only contains white spaces, text.size() is returned.
89 */
90static inline int firstNonSpaceChar(QStringView text)
91{
92 for (int i = 0; i < text.length(); ++i) {
93 if (!text[i].isSpace()) {
94 return i;
95 }
96 }
97 return text.size();
98}
99
100State AbstractHighlighter::highlightLine(QStringView text, const State &state)
101{
102 Q_D(AbstractHighlighter);
103
104 // verify definition, deal with no highlighting being enabled
105 d->ensureDefinitionLoaded();
106 const auto defData = DefinitionData::get(d->m_definition);
107 if (!d->m_definition.isValid() || !defData->isLoaded()) {
108 applyFormat(offset: 0, length: text.size(), format: Format());
109 return State();
110 }
111
112 // limit the cache for unification to some reasonable size
113 // we use here at the moment 64k elements to not hog too much memory
114 // and to make the clearing no big stall
115 if (defData->unify.size() > 64 * 1024)
116 defData->unify.clear();
117
118 // verify/initialize state
119 auto newState = state;
120 auto stateData = StateData::get(state: newState);
121 bool isSharedData = true;
122 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
123 qCDebug(Log) << "Got invalid state, resetting.";
124 stateData = nullptr;
125 }
126 if (Q_UNLIKELY(!stateData)) {
127 stateData = StateData::reset(state&: newState);
128 stateData->push(defData->initialContext(), QStringList());
129 stateData->m_defId = defData->id;
130 isSharedData = false;
131 }
132
133 // process empty lines
134 if (Q_UNLIKELY(text.isEmpty())) {
135 /**
136 * handle line empty context switches
137 * guard against endless loops
138 * see https://phabricator.kde.org/D18509
139 */
140 int endlessLoopingCounter = 0;
141 while (!stateData->topContext()->lineEmptyContext().isStay()) {
142 /**
143 * line empty context switches
144 */
145 if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) {
146 /**
147 * end when trying to #pop the main context
148 */
149 break;
150 }
151
152 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
153 break;
154 }
155
156 // guard against endless loops
157 ++endlessLoopingCounter;
158 if (endlessLoopingCounter > 1024) {
159 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
160 break;
161 }
162 }
163 auto context = stateData->topContext();
164 applyFormat(offset: 0, length: 0, format: context->attributeFormat());
165 return *defData->unify.insert(newState);
166 }
167
168 int offset = 0;
169 int beginOffset = 0;
170 bool lineContinuation = false;
171
172 /**
173 * for expensive rules like regexes we do:
174 * - match them for the complete line, as this is faster than re-trying them at all positions
175 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
176 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
177 */
178 QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets;
179 QStringList capturesForLastDynamicSkipOffset;
180
181 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
182 auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
183 return v.first == r;
184 });
185 if (i == skipOffsets.end())
186 return 0;
187 return i->second;
188 };
189
190 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
191 auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
192 return v.first == r;
193 });
194 if (it == skipOffsets.end()) {
195 skipOffsets.push_back({r, i});
196 } else {
197 it->second = i;
198 }
199 };
200
201 /**
202 * current active format
203 * stored as pointer to avoid deconstruction/constructions inside the internal loop
204 * the pointers are stable, the formats are either in the contexts or rules
205 */
206 auto currentFormat = &stateData->topContext()->attributeFormat();
207
208 /**
209 * cached first non-space character, needs to be computed if < 0
210 */
211 int firstNonSpace = -1;
212 int lastOffset = offset;
213 int endlessLoopingCounter = 0;
214 do {
215 /**
216 * avoid that we loop endless for some broken hl definitions
217 */
218 if (lastOffset == offset) {
219 ++endlessLoopingCounter;
220 if (endlessLoopingCounter > 1024) {
221 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
222 break;
223 }
224 } else {
225 // ensure we made progress, clear the endlessLoopingCounter
226 Q_ASSERT(offset > lastOffset);
227 lastOffset = offset;
228 endlessLoopingCounter = 0;
229 }
230
231 /**
232 * try to match all rules in the context in order of declaration in XML
233 */
234 bool isLookAhead = false;
235 int newOffset = 0;
236 const Format *newFormat = nullptr;
237 for (const auto &rule : stateData->topContext()->rules()) {
238 /**
239 * filter out rules that require a specific column
240 */
241 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
242 continue;
243 }
244
245 /**
246 * filter out rules that only match for leading whitespace
247 */
248 if (rule->firstNonSpace()) {
249 /**
250 * compute the first non-space lazy
251 * avoids computing it for contexts without any such rules
252 */
253 if (firstNonSpace < 0) {
254 firstNonSpace = firstNonSpaceChar(text);
255 }
256
257 /**
258 * can we skip?
259 */
260 if (offset > firstNonSpace) {
261 continue;
262 }
263 }
264
265 int currentSkipOffset = 0;
266 if (Q_UNLIKELY(rule->hasSkipOffset())) {
267 /**
268 * shall we skip application of this rule? two cases:
269 * - rule can't match at all => currentSkipOffset < 0
270 * - rule will only match for some higher offset => currentSkipOffset > offset
271 *
272 * we need to invalidate this if we are dynamic and have different captures then last time
273 */
274 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
275 skipOffsets.clear();
276 } else {
277 currentSkipOffset = getSkipOffsetValue(rule.get());
278 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
279 continue;
280 }
281 }
282 }
283
284 auto newResult = rule->doMatch(text, offset, stateData->topCaptures());
285 newOffset = newResult.offset();
286
287 /**
288 * update skip offset if new one rules out any later match or is larger than current one
289 */
290 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
291 insertSkipOffset(rule.get(), newResult.skipOffset());
292
293 // remember new captures, if dynamic to enforce proper reset above on change!
294 if (rule->isDynamic()) {
295 capturesForLastDynamicSkipOffset = stateData->topCaptures();
296 }
297 }
298
299 if (newOffset <= offset) {
300 continue;
301 }
302
303 /**
304 * apply folding.
305 * special cases:
306 * - rule with endRegion + beginRegion: in endRegion, the length is 0
307 * - rule with lookAhead: length is 0
308 */
309 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
310 applyFolding(offset, 0, rule->endRegion());
311 } else if (rule->endRegion().isValid()) {
312 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion());
313 }
314 if (rule->beginRegion().isValid()) {
315 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion());
316 }
317
318 if (rule->isLookAhead()) {
319 Q_ASSERT(!rule->context().isStay());
320 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
321 isLookAhead = true;
322 break;
323 }
324
325 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
326 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
327 if (newOffset == text.size() && rule->isLineContinue()) {
328 lineContinuation = true;
329 }
330 break;
331 }
332 if (isLookAhead) {
333 continue;
334 }
335
336 if (newOffset <= offset) { // no matching rule
337 if (stateData->topContext()->fallthrough()) {
338 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData);
339 continue;
340 }
341
342 newOffset = offset + 1;
343 newFormat = &stateData->topContext()->attributeFormat();
344 }
345
346 /**
347 * if we arrive here, some new format has to be set!
348 */
349 Q_ASSERT(newFormat);
350
351 /**
352 * on format change, apply the last one and switch to new one
353 */
354 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
355 if (offset > 0) {
356 applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat);
357 }
358 beginOffset = offset;
359 currentFormat = newFormat;
360 }
361
362 /**
363 * we must have made progress if we arrive here!
364 */
365 Q_ASSERT(newOffset > offset);
366 offset = newOffset;
367
368 } while (offset < text.size());
369
370 /**
371 * apply format for remaining text, if any
372 */
373 if (beginOffset < offset) {
374 applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat);
375 }
376
377 /**
378 * handle line end context switches
379 * guard against endless loops
380 * see https://phabricator.kde.org/D18509
381 */
382 {
383 int endlessLoopingCounter = 0;
384 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
385 if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) {
386 break;
387 }
388
389 // guard against endless loops
390 ++endlessLoopingCounter;
391 if (endlessLoopingCounter > 1024) {
392 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
393 break;
394 }
395 }
396 }
397
398 return *defData->unify.insert(newState);
399}
400
401bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
402{
403 const auto popCount = contextSwitch.popCount();
404 const auto context = contextSwitch.context();
405 if (popCount <= 0 && !context) {
406 return true;
407 }
408
409 // a modified state must be detached before modification
410 if (isSharedData) {
411 data = StateData::detach(state);
412 isSharedData = false;
413 }
414
415 // kill as many items as requested from the stack, will always keep the initial context alive!
416 const bool initialContextSurvived = data->pop(popCount);
417
418 // if we have a new context to add, push it
419 // then we always "succeed"
420 if (context) {
421 data->push(context, std::move(captures));
422 return true;
423 }
424
425 // else we abort, if we did try to pop the initial context
426 return initialContextSurvived;
427}
428
429void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
430{
431 Q_UNUSED(offset);
432 Q_UNUSED(length);
433 Q_UNUSED(region);
434}
435

source code of syntax-highlighting/src/lib/abstracthighlighter.cpp