1/*
2 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "abstracthighlighter.h"
8#include "abstracthighlighter_p.h"
9#include "context_p.h"
10#include "definition_p.h"
11#include "foldingregion.h"
12#include "format.h"
13#include "ksyntaxhighlighting_logging.h"
14#include "repository.h"
15#include "repository_p.h"
16#include "rule_p.h"
17#include "state.h"
18#include "state_p.h"
19#include "theme.h"
20
21using namespace KSyntaxHighlighting;
22
23AbstractHighlighterPrivate::AbstractHighlighterPrivate()
24{
25}
26
27AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
28{
29}
30
31void AbstractHighlighterPrivate::ensureDefinitionLoaded()
32{
33 auto defData = DefinitionData::get(def: m_definition);
34 if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) {
35 qCDebug(Log) << "Definition became invalid, trying re-lookup.";
36 m_definition = defData->repo->definitionForName(defName: m_definition.name());
37 defData = DefinitionData::get(def: m_definition);
38 }
39
40 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
41 qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
42 }
43
44 if (m_definition.isValid()) {
45 defData->load();
46 }
47}
48
49AbstractHighlighter::AbstractHighlighter()
50 : d_ptr(new AbstractHighlighterPrivate)
51{
52}
53
54AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
55 : d_ptr(dd)
56{
57}
58
59AbstractHighlighter::~AbstractHighlighter()
60{
61 delete d_ptr;
62}
63
64Definition AbstractHighlighter::definition() const
65{
66 return d_ptr->m_definition;
67}
68
69void AbstractHighlighter::setDefinition(const Definition &def)
70{
71 Q_D(AbstractHighlighter);
72 d->m_definition = def;
73}
74
75Theme AbstractHighlighter::theme() const
76{
77 Q_D(const AbstractHighlighter);
78 return d->m_theme;
79}
80
81void AbstractHighlighter::setTheme(const Theme &theme)
82{
83 Q_D(AbstractHighlighter);
84 d->m_theme = theme;
85}
86
87/**
88 * Returns the index of the first non-space character. If the line is empty,
89 * or only contains white spaces, text.size() is returned.
90 */
91static inline int firstNonSpaceChar(QStringView text)
92{
93 for (int i = 0; i < text.length(); ++i) {
94 if (!text[i].isSpace()) {
95 return i;
96 }
97 }
98 return text.size();
99}
100
101State AbstractHighlighter::highlightLine(QStringView text, const State &state)
102{
103 Q_D(AbstractHighlighter);
104
105 // verify definition, deal with no highlighting being enabled
106 d->ensureDefinitionLoaded();
107 const auto defData = DefinitionData::get(def: d->m_definition);
108 if (!d->m_definition.isValid() || !defData->isLoaded()) {
109 applyFormat(offset: 0, length: text.size(), format: Format());
110 return State();
111 }
112
113 // limit the cache for unification to some reasonable size
114 // we use here at the moment 64k elements to not hog too much memory
115 // and to make the clearing no big stall
116 if (defData->unify.size() > 64 * 1024)
117 defData->unify.clear();
118
119 // verify/initialize state
120 auto newState = state;
121 auto stateData = StateData::get(state: newState);
122 bool isSharedData = true;
123 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
124 qCDebug(Log) << "Got invalid state, resetting.";
125 stateData = nullptr;
126 }
127 if (Q_UNLIKELY(!stateData)) {
128 stateData = StateData::reset(state&: newState);
129 stateData->push(context: defData->initialContext(), captures: QStringList());
130 stateData->m_defId = defData->id;
131 isSharedData = false;
132 }
133
134 // process empty lines
135 if (Q_UNLIKELY(text.isEmpty())) {
136 /**
137 * handle line empty context switches
138 * guard against endless loops
139 * see https://phabricator.kde.org/D18509
140 */
141 int endlessLoopingCounter = 0;
142 while (!stateData->topContext()->lineEmptyContext().isStay()) {
143 /**
144 * line empty context switches
145 */
146 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEmptyContext(), captures: QStringList(), state&: newState, isSharedData)) {
147 /**
148 * end when trying to #pop the main context
149 */
150 break;
151 }
152
153 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
154 break;
155 }
156
157 // guard against endless loops
158 ++endlessLoopingCounter;
159 if (endlessLoopingCounter > 1024) {
160 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
161 break;
162 }
163 }
164 auto context = stateData->topContext();
165 applyFormat(offset: 0, length: 0, format: context->attributeFormat());
166 return *defData->unify.insert(value: newState);
167 }
168
169 auto &dynamicRegexpCache = RepositoryPrivate::get(repo: defData->repo)->m_dynamicRegexpCache;
170
171 int offset = 0;
172 int beginOffset = 0;
173 bool lineContinuation = false;
174
175 /**
176 * for expensive rules like regexes we do:
177 * - match them for the complete line, as this is faster than re-trying them at all positions
178 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
179 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
180 */
181 QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets;
182 QStringList capturesForLastDynamicSkipOffset;
183
184 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
185 auto i = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
186 return v.first == r;
187 });
188 if (i == skipOffsets.end())
189 return 0;
190 return i->second;
191 };
192
193 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
194 auto it = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) {
195 return v.first == r;
196 });
197 if (it == skipOffsets.end()) {
198 skipOffsets.push_back(t: {r, i});
199 } else {
200 it->second = i;
201 }
202 };
203
204 /**
205 * current active format
206 * stored as pointer to avoid deconstruction/constructions inside the internal loop
207 * the pointers are stable, the formats are either in the contexts or rules
208 */
209 auto currentFormat = &stateData->topContext()->attributeFormat();
210
211 /**
212 * cached first non-space character, needs to be computed if < 0
213 */
214 int firstNonSpace = -1;
215 int lastOffset = offset;
216 int endlessLoopingCounter = 0;
217 do {
218 /**
219 * avoid that we loop endless for some broken hl definitions
220 */
221 if (lastOffset == offset) {
222 ++endlessLoopingCounter;
223 if (endlessLoopingCounter > 1024) {
224 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
225 break;
226 }
227 } else {
228 // ensure we made progress, clear the endlessLoopingCounter
229 Q_ASSERT(offset > lastOffset);
230 lastOffset = offset;
231 endlessLoopingCounter = 0;
232 }
233
234 /**
235 * try to match all rules in the context in order of declaration in XML
236 */
237 bool isLookAhead = false;
238 int newOffset = 0;
239 const Format *newFormat = nullptr;
240 for (const auto &ruleShared : stateData->topContext()->rules()) {
241 auto rule = ruleShared.get();
242 /**
243 * filter out rules that require a specific column
244 */
245 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
246 continue;
247 }
248
249 /**
250 * filter out rules that only match for leading whitespace
251 */
252 if (rule->firstNonSpace()) {
253 /**
254 * compute the first non-space lazy
255 * avoids computing it for contexts without any such rules
256 */
257 if (firstNonSpace < 0) {
258 firstNonSpace = firstNonSpaceChar(text);
259 }
260
261 /**
262 * can we skip?
263 */
264 if (offset > firstNonSpace) {
265 continue;
266 }
267 }
268
269 int currentSkipOffset = 0;
270 if (Q_UNLIKELY(rule->hasSkipOffset())) {
271 /**
272 * shall we skip application of this rule? two cases:
273 * - rule can't match at all => currentSkipOffset < 0
274 * - rule will only match for some higher offset => currentSkipOffset > offset
275 *
276 * we need to invalidate this if we are dynamic and have different captures then last time
277 */
278 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
279 skipOffsets.clear();
280 } else {
281 currentSkipOffset = getSkipOffsetValue(rule);
282 if (currentSkipOffset < 0 || currentSkipOffset > offset) {
283 continue;
284 }
285 }
286 }
287
288 auto newResult = rule->doMatch(text, offset, captures: stateData->topCaptures(), dynamicRegexpCache);
289 newOffset = newResult.offset();
290
291 /**
292 * update skip offset if new one rules out any later match or is larger than current one
293 */
294 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
295 insertSkipOffset(rule, newResult.skipOffset());
296
297 // remember new captures, if dynamic to enforce proper reset above on change!
298 if (rule->isDynamic()) {
299 capturesForLastDynamicSkipOffset = stateData->topCaptures();
300 }
301 }
302
303 if (newOffset <= offset) {
304 continue;
305 }
306
307 /**
308 * apply folding.
309 * special cases:
310 * - rule with endRegion + beginRegion: in endRegion, the length is 0
311 * - rule with lookAhead: length is 0
312 */
313 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
314 applyFolding(offset, length: 0, region: rule->endRegion());
315 } else if (rule->endRegion().isValid()) {
316 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->endRegion());
317 }
318 if (rule->beginRegion().isValid()) {
319 applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->beginRegion());
320 }
321
322 if (rule->isLookAhead()) {
323 Q_ASSERT(!rule->context().isStay());
324 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
325 isLookAhead = true;
326 break;
327 }
328
329 d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData);
330 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
331 if (newOffset == text.size() && rule->isLineContinue()) {
332 lineContinuation = true;
333 }
334 break;
335 }
336 if (isLookAhead) {
337 continue;
338 }
339
340 if (newOffset <= offset) { // no matching rule
341 if (stateData->topContext()->fallthrough()) {
342 d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->fallthroughContext(), captures: QStringList(), state&: newState, isSharedData);
343 continue;
344 }
345
346 newOffset = offset + 1;
347 newFormat = &stateData->topContext()->attributeFormat();
348 }
349
350 /**
351 * if we arrive here, some new format has to be set!
352 */
353 Q_ASSERT(newFormat);
354
355 /**
356 * on format change, apply the last one and switch to new one
357 */
358 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
359 if (offset > 0) {
360 applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat);
361 }
362 beginOffset = offset;
363 currentFormat = newFormat;
364 }
365
366 /**
367 * we must have made progress if we arrive here!
368 */
369 Q_ASSERT(newOffset > offset);
370 offset = newOffset;
371
372 } while (offset < text.size());
373
374 /**
375 * apply format for remaining text, if any
376 */
377 if (beginOffset < offset) {
378 applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat);
379 }
380
381 /**
382 * handle line end context switches
383 * guard against endless loops
384 * see https://phabricator.kde.org/D18509
385 */
386 {
387 int endlessLoopingCounter = 0;
388 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
389 if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEndContext(), captures: QStringList(), state&: newState, isSharedData)) {
390 break;
391 }
392
393 // guard against endless loops
394 ++endlessLoopingCounter;
395 if (endlessLoopingCounter > 1024) {
396 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
397 break;
398 }
399 }
400 }
401
402 return *defData->unify.insert(value: newState);
403}
404
405bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
406{
407 const auto popCount = contextSwitch.popCount();
408 const auto context = contextSwitch.context();
409 if (popCount <= 0 && !context) {
410 return true;
411 }
412
413 // a modified state must be detached before modification
414 if (isSharedData) {
415 data = StateData::detach(state);
416 isSharedData = false;
417 }
418
419 // kill as many items as requested from the stack, will always keep the initial context alive!
420 const bool initialContextSurvived = data->pop(popCount);
421
422 // if we have a new context to add, push it
423 // then we always "succeed"
424 if (context) {
425 data->push(context, captures: std::move(captures));
426 return true;
427 }
428
429 // else we abort, if we did try to pop the initial context
430 return initialContextSurvived;
431}
432
433void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
434{
435 Q_UNUSED(offset);
436 Q_UNUSED(length);
437 Q_UNUSED(region);
438}
439

source code of syntax-highlighting/src/lib/abstracthighlighter.cpp