1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "repository_p.h"
16#include "rule_p.h"
17#include "state.h"
18#include "state_p.h"
19#include "theme.h"
20
21using namespace KSyntaxHighlighting;
22
23AbstractHighlighterPrivate::AbstractHighlighterPrivate()
24{
25}
26
27AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
28{
29}
30
31void AbstractHighlighterPrivate::ensureDefinitionLoaded()
32{
33 auto defData = DefinitionData::get(def: m_definition);
34 if (Q_UNLIKELY(!m_definition.isValid())) {
35 if (defData->repo && !defData->name.isEmpty()) {
36 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
37 m_definition = defData->repo->definitionForName(defName: defData->name);
38 defData = DefinitionData::get(def: m_definition);
39 }
40
41 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
42 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
43 }
44 }
45
46 if (m_definition.isValid()) {
47 defData->load();
48 }
49}
50
51AbstractHighlighter::AbstractHighlighter()
52 : d_ptr(new AbstractHighlighterPrivate)
53{
54}
55
56AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
57 : d_ptr(dd)
58{
59}
60
61AbstractHighlighter::~AbstractHighlighter()
62{
63 delete d_ptr;
64}
65
66Definition AbstractHighlighter::definition() const
67{
68 return d_ptr->m_definition;
69}
70
71void AbstractHighlighter::setDefinition(const Definition &def)
72{
73 Q_D(AbstractHighlighter);
74 d->m_definition = def;
75}
76
77Theme AbstractHighlighter::theme() const
78{
79 Q_D(const AbstractHighlighter);
80 return d->m_theme;
81}
82
83void AbstractHighlighter::setTheme(const Theme &theme)
84{
85 Q_D(AbstractHighlighter);
86 d->m_theme = theme;
87}
88
89/**
90 * Returns the index of the first non-space character. If the line is empty,
91 * or only contains white spaces, text.size() is returned.
92 */
93static inline int firstNonSpaceChar(QStringView text)
94{
95 for (int i = 0; i < text.length(); ++i) {
96 if (!text[i].isSpace()) {
97 return i;
98 }
99 }
100 return text.size();
101}
102
103State AbstractHighlighter::highlightLine(QStringView text, const State &state)
104{
105 Q_D(AbstractHighlighter);
106
107 // verify definition, deal with no highlighting being enabled
108 d->ensureDefinitionLoaded();
109 const auto defData = DefinitionData::get(def: d->m_definition);
110 if (!d->m_definition.isValid() || !defData->isLoaded()) {
111 applyFormat(offset: 0, length: text.size(), format: Format());
112 return State();
113 }
114
115 // limit the cache for unification to some reasonable size
116 // we use here at the moment 64k elements to not hog too much memory
117 // and to make the clearing no big stall
118 if (defData->unify.size() > 64 * 1024)
119 defData->unify.clear();
120
121 // verify/initialize state
122 auto newState = state;
123 auto stateData = StateData::get(state: newState);
124 bool isSharedData = true;
125 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
126 qCDebug(Log) << "Got invalid state, resetting.";
127 stateData = nullptr;
128 }
129 if (Q_UNLIKELY(!stateData)) {
130 stateData = StateData::reset(state&: newState);
131 auto *initialContext = defData->initialContext();
132 stateData->push(firstContext: &initialContext, lastContext: &initialContext + 1, captures: QStringList());
133 stateData->m_defId = defData->id;
134 isSharedData = false;
135 }
136
137 // process empty lines
138 if (Q_UNLIKELY(text.isEmpty())) {
139 /**
140 * handle line empty context switches
141 * guard against endless loops
142 * see https://phabricator.kde.org/D18509
143 */
144 int endlessLoopingCounter = 0;
145 while (!stateData->topContext()->lineEmptyContext().isStay()) {
146 /**
147 * line empty context switches
148 */
149 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEmptyContext(), captures: QStringList(), state&: newState, isSharedData)) {
150 /**
151 * end when trying to #pop the main context
152 */
153 break;
154 }
155
156 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
157 break;
158 }
159
160 // guard against endless loops
161 ++endlessLoopingCounter;
162 if (endlessLoopingCounter > 1024) {
163 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
164 break;
165 }
166 }
167 auto context = stateData->topContext();
168 applyFormat(offset: 0, length: 0, format: context->attributeFormat());
169 return *defData->unify.insert(value: newState);
170 }
171
172 auto &dynamicRegexpCache = RepositoryPrivate::get(repo: defData->repo)->m_dynamicRegexpCache;
173
174 int offset = 0;
175 int beginOffset = 0;
176 bool lineContinuation = false;
177
178 /**
179 * for expensive rules like regexes we do:
180 * - match them for the complete line, as this is faster than re-trying them at all positions
181 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
182 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
183 */
184 QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets;
185 QStringList capturesForLastDynamicSkipOffset;
186
187 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
188 auto i = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
189 return v.first == r;
190 });
191 if (i == skipOffsets.end())
192 return 0;
193 return i->second;
194 };
195
196 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
197 auto it = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
198 return v.first == r;
199 });
200 if (it == skipOffsets.end()) {
201 skipOffsets.push_back(t: {r, i});
202 } else {
203 it->second = i;
204 }
205 };
206
207 /**
208 * current active format
209 * stored as pointer to avoid deconstruction/constructions inside the internal loop
210 * the pointers are stable, the formats are either in the contexts or rules
211 */
212 auto currentFormat = &stateData->topContext()->attributeFormat();
213
214 /**
215 * cached first non-space character, needs to be computed if < 0
216 */
217 int firstNonSpace = -1;
218 int lastOffset = offset;
219 int endlessLoopingCounter = 0;
220 do {
221 /**
222 * avoid that we loop endless for some broken hl definitions
223 */
224 if (lastOffset == offset) {
225 ++endlessLoopingCounter;
226 if (endlessLoopingCounter > 1024) {
227 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
228 break;
229 }
230 } else {
231 // ensure we made progress, clear the endlessLoopingCounter
232 Q_ASSERT(offset > lastOffset);
233 lastOffset = offset;
234 endlessLoopingCounter = 0;
235 }
236
237 /**
238 * try to match all rules in the context in order of declaration in XML
239 */
240 bool isLookAhead = false;
241 int newOffset = 0;
242 const Format *newFormat = nullptr;
243 for (const auto &ruleShared : stateData->topContext()->rules()) {
244 auto rule = ruleShared.get();
245 /**
246 * filter out rules that require a specific column
247 */
248 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
249 continue;
250 }
251
252 /**
253 * filter out rules that only match for leading whitespace
254 */
255 if (rule->firstNonSpace()) {
256 /**
257 * compute the first non-space lazy
258 * avoids computing it for contexts without any such rules
259 */
260 if (firstNonSpace < 0) {
261 firstNonSpace = firstNonSpaceChar(text);
262 }
263
264 /**
265 * can we skip?
266 */
267 if (offset > firstNonSpace) {
268 continue;
269 }
270 }
271
272 int currentSkipOffset = 0;
273 if (Q_UNLIKELY(rule->hasSkipOffset())) {
274 /**
275 * shall we skip application of this rule? two cases:
276 * - rule can't match at all => currentSkipOffset < 0
277 * - rule will only match for some higher offset => currentSkipOffset > offset
278 *
279 * we need to invalidate this if we are dynamic and have different captures then last time
280 */
281 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
282 skipOffsets.clear();
283 } else {
284 currentSkipOffset = getSkipOffsetValue(rule);
285 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
286 continue;
287 }
288 }
289 }
290
291 auto newResult = rule->doMatch(text, offset, captures: stateData->topCaptures(), dynamicRegexpCache);
292 newOffset = newResult.offset();
293
294 /**
295 * update skip offset if new one rules out any later match or is larger than current one
296 */
297 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
298 insertSkipOffset(rule, newResult.skipOffset());
299
300 // remember new captures, if dynamic to enforce proper reset above on change!
301 if (rule->isDynamic()) {
302 capturesForLastDynamicSkipOffset = stateData->topCaptures();
303 }
304 }
305
306 if (newOffset <= offset) {
307 continue;
308 }
309
310 /**
311 * apply folding.
312 * special cases:
313 * - rule with endRegion + beginRegion: in endRegion, the length is 0
314 * - rule with lookAhead: length is 0
315 */
316 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
317 applyFolding(offset, length: 0, region: rule->endRegion());
318 } else if (rule->endRegion().isValid()) {
319 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->endRegion());
320 }
321 if (rule->beginRegion().isValid()) {
322 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->beginRegion());
323 }
324
325 if (rule->isLookAhead()) {
326 Q_ASSERT(!rule->context().isStay());
327 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
328 isLookAhead = true;
329 break;
330 }
331
332 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
333 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
334 if (newOffset == text.size() && rule->isLineContinue()) {
335 lineContinuation = true;
336 }
337 break;
338 }
339 if (isLookAhead) {
340 continue;
341 }
342
343 if (newOffset <= offset) { // no matching rule
344 if (stateData->topContext()->fallthrough()) {
345 d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->fallthroughContext(), captures: QStringList(), state&: newState, isSharedData);
346 continue;
347 }
348
349 newOffset = offset + 1;
350 newFormat = &stateData->topContext()->attributeFormat();
351 }
352
353 /**
354 * if we arrive here, some new format has to be set!
355 */
356 Q_ASSERT(newFormat);
357
358 /**
359 * on format change, apply the last one and switch to new one
360 */
361 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
362 if (offset > 0) {
363 applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat);
364 }
365 beginOffset = offset;
366 currentFormat = newFormat;
367 }
368
369 /**
370 * we must have made progress if we arrive here!
371 */
372 Q_ASSERT(newOffset > offset);
373 offset = newOffset;
374
375 } while (offset < text.size());
376
377 /**
378 * apply format for remaining text, if any
379 */
380 if (beginOffset < offset) {
381 applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat);
382 }
383
384 /**
385 * handle line end context switches
386 * guard against endless loops
387 * see https://phabricator.kde.org/D18509
388 */
389 {
390 int endlessLoopingCounter = 0;
391 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
392 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEndContext(), captures: QStringList(), state&: newState, isSharedData)) {
393 break;
394 }
395
396 // guard against endless loops
397 ++endlessLoopingCounter;
398 if (endlessLoopingCounter > 1024) {
399 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
400 break;
401 }
402 }
403 }
404
405 return *defData->unify.insert(value: newState);
406}
407
408bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
409{
410 if (contextSwitch.isStay()) {
411 return true;
412 }
413
414 // a modified state must be detached before modification
415 if (isSharedData) {
416 data = StateData::detach(state);
417 isSharedData = false;
418 }
419
420 const auto &contexts = contextSwitch.contexts();
421
422 // kill as many items as requested from the stack, will always keep the initial context alive!
423 const bool initialContextSurvived = data->pop(popCount: contextSwitch.popCount());
424
425 // if we have new contexts to add, push it
426 // then we always "succeed"
427 if (!contexts.isEmpty()) {
428 data->push(firstContext: contexts.begin(), lastContext: contexts.end(), captures: std::move(captures));
429 return true;
430 }
431
432 // else we abort, if we did try to pop the initial context
433 return initialContextSurvived;
434}
435
436void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
437{
438 Q_UNUSED(offset);
439 Q_UNUSED(length);
440 Q_UNUSED(region);
441}
442

source code of syntax-highlighting/src/lib/abstracthighlighter.cpp