1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "repository_p.h"
16#include "rule_p.h"
17#include "state.h"
18#include "state_p.h"
19#include "theme.h"
20
21using namespace KSyntaxHighlighting;
22
23AbstractHighlighterPrivate::AbstractHighlighterPrivate()
24{
25}
26
27AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
28{
29}
30
31void AbstractHighlighterPrivate::ensureDefinitionLoaded()
32{
33 auto defData = DefinitionData::get(def: m_definition);
34 if (Q_UNLIKELY(!m_definition.isValid())) {
35 if (defData->repo && !defData->name.isEmpty()) {
36 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
37 m_definition = defData->repo->definitionForName(defName: defData->name);
38 defData = DefinitionData::get(def: m_definition);
39 }
40
41 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
42 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
43 }
44 }
45
46 if (m_definition.isValid()) {
47 defData->load();
48 }
49}
50
51AbstractHighlighter::AbstractHighlighter()
52 : d_ptr(new AbstractHighlighterPrivate)
53{
54}
55
56AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
57 : d_ptr(dd)
58{
59}
60
61AbstractHighlighter::~AbstractHighlighter()
62{
63 delete d_ptr;
64}
65
66Definition AbstractHighlighter::definition() const
67{
68 return d_ptr->m_definition;
69}
70
71void AbstractHighlighter::setDefinition(const Definition &def)
72{
73 Q_D(AbstractHighlighter);
74 d->m_definition = def;
75}
76
77Theme AbstractHighlighter::theme() const
78{
79 Q_D(const AbstractHighlighter);
80 return d->m_theme;
81}
82
83void AbstractHighlighter::setTheme(const Theme &theme)
84{
85 Q_D(AbstractHighlighter);
86 d->m_theme = theme;
87}
88
89/**
90 * Returns the index of the first non-space character. If the line is empty,
91 * or only contains white spaces, text.size() is returned.
92 */
93static inline int firstNonSpaceChar(QStringView text)
94{
95 for (int i = 0; i < text.length(); ++i) {
96 if (!text[i].isSpace()) {
97 return i;
98 }
99 }
100 return text.size();
101}
102
103State AbstractHighlighter::highlightLine(QStringView text, const State &state)
104{
105 Q_D(AbstractHighlighter);
106
107 // verify definition, deal with no highlighting being enabled
108 d->ensureDefinitionLoaded();
109 const auto defData = DefinitionData::get(def: d->m_definition);
110 if (!d->m_definition.isValid() || !defData->isLoaded()) {
111 applyFormat(offset: 0, length: text.size(), format: Format());
112 return State();
113 }
114
115 // limit the cache for unification to some reasonable size
116 // we use here at the moment 64k elements to not hog too much memory
117 // and to make the clearing no big stall
118 if (defData->unify.size() > 64 * 1024)
119 defData->unify.clear();
120
121 // verify/initialize state
122 auto newState = state;
123 auto stateData = StateData::get(state: newState);
124 bool isSharedData = true;
125 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
126 qCDebug(Log) << "Got invalid state, resetting.";
127 stateData = nullptr;
128 }
129 if (Q_UNLIKELY(!stateData)) {
130 stateData = StateData::reset(state&: newState);
131 stateData->push(context: defData->initialContext(), captures: QStringList());
132 stateData->m_defId = defData->id;
133 isSharedData = false;
134 }
135
136 // process empty lines
137 if (Q_UNLIKELY(text.isEmpty())) {
138 /**
139 * handle line empty context switches
140 * guard against endless loops
141 * see https://phabricator.kde.org/D18509
142 */
143 int endlessLoopingCounter = 0;
144 while (!stateData->topContext()->lineEmptyContext().isStay()) {
145 /**
146 * line empty context switches
147 */
148 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEmptyContext(), captures: QStringList(), state&: newState, isSharedData)) {
149 /**
150 * end when trying to #pop the main context
151 */
152 break;
153 }
154
155 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
156 break;
157 }
158
159 // guard against endless loops
160 ++endlessLoopingCounter;
161 if (endlessLoopingCounter > 1024) {
162 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
163 break;
164 }
165 }
166 auto context = stateData->topContext();
167 applyFormat(offset: 0, length: 0, format: context->attributeFormat());
168 return *defData->unify.insert(value: newState);
169 }
170
171 auto &dynamicRegexpCache = RepositoryPrivate::get(repo: defData->repo)->m_dynamicRegexpCache;
172
173 int offset = 0;
174 int beginOffset = 0;
175 bool lineContinuation = false;
176
177 /**
178 * for expensive rules like regexes we do:
179 * - match them for the complete line, as this is faster than re-trying them at all positions
180 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
181 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
182 */
183 QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets;
184 QStringList capturesForLastDynamicSkipOffset;
185
186 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
187 auto i = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
188 return v.first == r;
189 });
190 if (i == skipOffsets.end())
191 return 0;
192 return i->second;
193 };
194
195 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
196 auto it = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
197 return v.first == r;
198 });
199 if (it == skipOffsets.end()) {
200 skipOffsets.push_back(t: {r, i});
201 } else {
202 it->second = i;
203 }
204 };
205
206 /**
207 * current active format
208 * stored as pointer to avoid deconstruction/constructions inside the internal loop
209 * the pointers are stable, the formats are either in the contexts or rules
210 */
211 auto currentFormat = &stateData->topContext()->attributeFormat();
212
213 /**
214 * cached first non-space character, needs to be computed if < 0
215 */
216 int firstNonSpace = -1;
217 int lastOffset = offset;
218 int endlessLoopingCounter = 0;
219 do {
220 /**
221 * avoid that we loop endless for some broken hl definitions
222 */
223 if (lastOffset == offset) {
224 ++endlessLoopingCounter;
225 if (endlessLoopingCounter > 1024) {
226 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
227 break;
228 }
229 } else {
230 // ensure we made progress, clear the endlessLoopingCounter
231 Q_ASSERT(offset > lastOffset);
232 lastOffset = offset;
233 endlessLoopingCounter = 0;
234 }
235
236 /**
237 * try to match all rules in the context in order of declaration in XML
238 */
239 bool isLookAhead = false;
240 int newOffset = 0;
241 const Format *newFormat = nullptr;
242 for (const auto &ruleShared : stateData->topContext()->rules()) {
243 auto rule = ruleShared.get();
244 /**
245 * filter out rules that require a specific column
246 */
247 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
248 continue;
249 }
250
251 /**
252 * filter out rules that only match for leading whitespace
253 */
254 if (rule->firstNonSpace()) {
255 /**
256 * compute the first non-space lazy
257 * avoids computing it for contexts without any such rules
258 */
259 if (firstNonSpace < 0) {
260 firstNonSpace = firstNonSpaceChar(text);
261 }
262
263 /**
264 * can we skip?
265 */
266 if (offset > firstNonSpace) {
267 continue;
268 }
269 }
270
271 int currentSkipOffset = 0;
272 if (Q_UNLIKELY(rule->hasSkipOffset())) {
273 /**
274 * shall we skip application of this rule? two cases:
275 * - rule can't match at all => currentSkipOffset < 0
276 * - rule will only match for some higher offset => currentSkipOffset > offset
277 *
278 * we need to invalidate this if we are dynamic and have different captures then last time
279 */
280 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
281 skipOffsets.clear();
282 } else {
283 currentSkipOffset = getSkipOffsetValue(rule);
284 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
285 continue;
286 }
287 }
288 }
289
290 auto newResult = rule->doMatch(text, offset, captures: stateData->topCaptures(), dynamicRegexpCache);
291 newOffset = newResult.offset();
292
293 /**
294 * update skip offset if new one rules out any later match or is larger than current one
295 */
296 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
297 insertSkipOffset(rule, newResult.skipOffset());
298
299 // remember new captures, if dynamic to enforce proper reset above on change!
300 if (rule->isDynamic()) {
301 capturesForLastDynamicSkipOffset = stateData->topCaptures();
302 }
303 }
304
305 if (newOffset <= offset) {
306 continue;
307 }
308
309 /**
310 * apply folding.
311 * special cases:
312 * - rule with endRegion + beginRegion: in endRegion, the length is 0
313 * - rule with lookAhead: length is 0
314 */
315 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
316 applyFolding(offset, length: 0, region: rule->endRegion());
317 } else if (rule->endRegion().isValid()) {
318 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->endRegion());
319 }
320 if (rule->beginRegion().isValid()) {
321 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->beginRegion());
322 }
323
324 if (rule->isLookAhead()) {
325 Q_ASSERT(!rule->context().isStay());
326 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
327 isLookAhead = true;
328 break;
329 }
330
331 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
332 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
333 if (newOffset == text.size() && rule->isLineContinue()) {
334 lineContinuation = true;
335 }
336 break;
337 }
338 if (isLookAhead) {
339 continue;
340 }
341
342 if (newOffset <= offset) { // no matching rule
343 if (stateData->topContext()->fallthrough()) {
344 d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->fallthroughContext(), captures: QStringList(), state&: newState, isSharedData);
345 continue;
346 }
347
348 newOffset = offset + 1;
349 newFormat = &stateData->topContext()->attributeFormat();
350 }
351
352 /**
353 * if we arrive here, some new format has to be set!
354 */
355 Q_ASSERT(newFormat);
356
357 /**
358 * on format change, apply the last one and switch to new one
359 */
360 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
361 if (offset > 0) {
362 applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat);
363 }
364 beginOffset = offset;
365 currentFormat = newFormat;
366 }
367
368 /**
369 * we must have made progress if we arrive here!
370 */
371 Q_ASSERT(newOffset > offset);
372 offset = newOffset;
373
374 } while (offset < text.size());
375
376 /**
377 * apply format for remaining text, if any
378 */
379 if (beginOffset < offset) {
380 applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat);
381 }
382
383 /**
384 * handle line end context switches
385 * guard against endless loops
386 * see https://phabricator.kde.org/D18509
387 */
388 {
389 int endlessLoopingCounter = 0;
390 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
391 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEndContext(), captures: QStringList(), state&: newState, isSharedData)) {
392 break;
393 }
394
395 // guard against endless loops
396 ++endlessLoopingCounter;
397 if (endlessLoopingCounter > 1024) {
398 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
399 break;
400 }
401 }
402 }
403
404 return *defData->unify.insert(value: newState);
405}
406
407bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
408{
409 const auto popCount = contextSwitch.popCount();
410 const auto context = contextSwitch.context();
411 if (popCount <= 0 && !context) {
412 return true;
413 }
414
415 // a modified state must be detached before modification
416 if (isSharedData) {
417 data = StateData::detach(state);
418 isSharedData = false;
419 }
420
421 // kill as many items as requested from the stack, will always keep the initial context alive!
422 const bool initialContextSurvived = data->pop(popCount);
423
424 // if we have a new context to add, push it
425 // then we always "succeed"
426 if (context) {
427 data->push(context, captures: std::move(captures));
428 return true;
429 }
430
431 // else we abort, if we did try to pop the initial context
432 return initialContextSurvived;
433}
434
435void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
436{
437 Q_UNUSED(offset);
438 Q_UNUSED(length);
439 Q_UNUSED(region);
440}
441

source code of syntax-highlighting/src/lib/abstracthighlighter.cpp