1 | /* |
2 | SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "abstracthighlighter.h" |
8 | #include "abstracthighlighter_p.h" |
9 | #include "context_p.h" |
10 | #include "definition_p.h" |
11 | #include "foldingregion.h" |
12 | #include "format.h" |
13 | #include "ksyntaxhighlighting_logging.h" |
14 | #include "repository.h" |
15 | #include "repository_p.h" |
16 | #include "rule_p.h" |
17 | #include "state.h" |
18 | #include "state_p.h" |
19 | #include "theme.h" |
20 | |
21 | using namespace KSyntaxHighlighting; |
22 | |
23 | AbstractHighlighterPrivate::AbstractHighlighterPrivate() |
24 | { |
25 | } |
26 | |
27 | AbstractHighlighterPrivate::~AbstractHighlighterPrivate() |
28 | { |
29 | } |
30 | |
31 | void AbstractHighlighterPrivate::ensureDefinitionLoaded() |
32 | { |
33 | auto defData = DefinitionData::get(def: m_definition); |
34 | if (Q_UNLIKELY(!m_definition.isValid())) { |
35 | if (defData->repo && !defData->name.isEmpty()) { |
36 | qCDebug(Log) << "Definition became invalid, trying re-lookup." ; |
37 | m_definition = defData->repo->definitionForName(defName: defData->name); |
38 | defData = DefinitionData::get(def: m_definition); |
39 | } |
40 | |
41 | if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) { |
42 | qCCritical(Log) << "Repository got deleted while a highlighter is still active!" ; |
43 | } |
44 | } |
45 | |
46 | if (m_definition.isValid()) { |
47 | defData->load(); |
48 | } |
49 | } |
50 | |
51 | AbstractHighlighter::AbstractHighlighter() |
52 | : d_ptr(new AbstractHighlighterPrivate) |
53 | { |
54 | } |
55 | |
56 | AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd) |
57 | : d_ptr(dd) |
58 | { |
59 | } |
60 | |
61 | AbstractHighlighter::~AbstractHighlighter() |
62 | { |
63 | delete d_ptr; |
64 | } |
65 | |
66 | Definition AbstractHighlighter::definition() const |
67 | { |
68 | return d_ptr->m_definition; |
69 | } |
70 | |
71 | void AbstractHighlighter::setDefinition(const Definition &def) |
72 | { |
73 | Q_D(AbstractHighlighter); |
74 | d->m_definition = def; |
75 | } |
76 | |
77 | Theme AbstractHighlighter::theme() const |
78 | { |
79 | Q_D(const AbstractHighlighter); |
80 | return d->m_theme; |
81 | } |
82 | |
83 | void AbstractHighlighter::setTheme(const Theme &theme) |
84 | { |
85 | Q_D(AbstractHighlighter); |
86 | d->m_theme = theme; |
87 | } |
88 | |
89 | /** |
90 | * Returns the index of the first non-space character. If the line is empty, |
91 | * or only contains white spaces, text.size() is returned. |
92 | */ |
93 | static inline int firstNonSpaceChar(QStringView text) |
94 | { |
95 | for (int i = 0; i < text.length(); ++i) { |
96 | if (!text[i].isSpace()) { |
97 | return i; |
98 | } |
99 | } |
100 | return text.size(); |
101 | } |
102 | |
103 | State AbstractHighlighter::highlightLine(QStringView text, const State &state) |
104 | { |
105 | Q_D(AbstractHighlighter); |
106 | |
107 | // verify definition, deal with no highlighting being enabled |
108 | d->ensureDefinitionLoaded(); |
109 | const auto defData = DefinitionData::get(def: d->m_definition); |
110 | if (!d->m_definition.isValid() || !defData->isLoaded()) { |
111 | applyFormat(offset: 0, length: text.size(), format: Format()); |
112 | return State(); |
113 | } |
114 | |
115 | // limit the cache for unification to some reasonable size |
116 | // we use here at the moment 64k elements to not hog too much memory |
117 | // and to make the clearing no big stall |
118 | if (defData->unify.size() > 64 * 1024) |
119 | defData->unify.clear(); |
120 | |
121 | // verify/initialize state |
122 | auto newState = state; |
123 | auto stateData = StateData::get(state: newState); |
124 | bool isSharedData = true; |
125 | if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) { |
126 | qCDebug(Log) << "Got invalid state, resetting." ; |
127 | stateData = nullptr; |
128 | } |
129 | if (Q_UNLIKELY(!stateData)) { |
130 | stateData = StateData::reset(state&: newState); |
131 | stateData->push(context: defData->initialContext(), captures: QStringList()); |
132 | stateData->m_defId = defData->id; |
133 | isSharedData = false; |
134 | } |
135 | |
136 | // process empty lines |
137 | if (Q_UNLIKELY(text.isEmpty())) { |
138 | /** |
139 | * handle line empty context switches |
140 | * guard against endless loops |
141 | * see https://phabricator.kde.org/D18509 |
142 | */ |
143 | int endlessLoopingCounter = 0; |
144 | while (!stateData->topContext()->lineEmptyContext().isStay()) { |
145 | /** |
146 | * line empty context switches |
147 | */ |
148 | if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEmptyContext(), captures: QStringList(), state&: newState, isSharedData)) { |
149 | /** |
150 | * end when trying to #pop the main context |
151 | */ |
152 | break; |
153 | } |
154 | |
155 | if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) { |
156 | break; |
157 | } |
158 | |
159 | // guard against endless loops |
160 | ++endlessLoopingCounter; |
161 | if (endlessLoopingCounter > 1024) { |
162 | qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line." ; |
163 | break; |
164 | } |
165 | } |
166 | auto context = stateData->topContext(); |
167 | applyFormat(offset: 0, length: 0, format: context->attributeFormat()); |
168 | return *defData->unify.insert(value: newState); |
169 | } |
170 | |
171 | auto &dynamicRegexpCache = RepositoryPrivate::get(repo: defData->repo)->m_dynamicRegexpCache; |
172 | |
173 | int offset = 0; |
174 | int beginOffset = 0; |
175 | bool lineContinuation = false; |
176 | |
177 | /** |
178 | * for expensive rules like regexes we do: |
179 | * - match them for the complete line, as this is faster than re-trying them at all positions |
180 | * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use |
181 | * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed |
182 | */ |
183 | QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets; |
184 | QStringList capturesForLastDynamicSkipOffset; |
185 | |
186 | auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int { |
187 | auto i = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) { |
188 | return v.first == r; |
189 | }); |
190 | if (i == skipOffsets.end()) |
191 | return 0; |
192 | return i->second; |
193 | }; |
194 | |
195 | auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) { |
196 | auto it = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) { |
197 | return v.first == r; |
198 | }); |
199 | if (it == skipOffsets.end()) { |
200 | skipOffsets.push_back(t: {r, i}); |
201 | } else { |
202 | it->second = i; |
203 | } |
204 | }; |
205 | |
206 | /** |
207 | * current active format |
208 | * stored as pointer to avoid deconstruction/constructions inside the internal loop |
209 | * the pointers are stable, the formats are either in the contexts or rules |
210 | */ |
211 | auto currentFormat = &stateData->topContext()->attributeFormat(); |
212 | |
213 | /** |
214 | * cached first non-space character, needs to be computed if < 0 |
215 | */ |
216 | int firstNonSpace = -1; |
217 | int lastOffset = offset; |
218 | int endlessLoopingCounter = 0; |
219 | do { |
220 | /** |
221 | * avoid that we loop endless for some broken hl definitions |
222 | */ |
223 | if (lastOffset == offset) { |
224 | ++endlessLoopingCounter; |
225 | if (endlessLoopingCounter > 1024) { |
226 | qCDebug(Log) << "Endless state transitions, aborting highlighting of line." ; |
227 | break; |
228 | } |
229 | } else { |
230 | // ensure we made progress, clear the endlessLoopingCounter |
231 | Q_ASSERT(offset > lastOffset); |
232 | lastOffset = offset; |
233 | endlessLoopingCounter = 0; |
234 | } |
235 | |
236 | /** |
237 | * try to match all rules in the context in order of declaration in XML |
238 | */ |
239 | bool isLookAhead = false; |
240 | int newOffset = 0; |
241 | const Format *newFormat = nullptr; |
242 | for (const auto &ruleShared : stateData->topContext()->rules()) { |
243 | auto rule = ruleShared.get(); |
244 | /** |
245 | * filter out rules that require a specific column |
246 | */ |
247 | if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) { |
248 | continue; |
249 | } |
250 | |
251 | /** |
252 | * filter out rules that only match for leading whitespace |
253 | */ |
254 | if (rule->firstNonSpace()) { |
255 | /** |
256 | * compute the first non-space lazy |
257 | * avoids computing it for contexts without any such rules |
258 | */ |
259 | if (firstNonSpace < 0) { |
260 | firstNonSpace = firstNonSpaceChar(text); |
261 | } |
262 | |
263 | /** |
264 | * can we skip? |
265 | */ |
266 | if (offset > firstNonSpace) { |
267 | continue; |
268 | } |
269 | } |
270 | |
271 | int currentSkipOffset = 0; |
272 | if (Q_UNLIKELY(rule->hasSkipOffset())) { |
273 | /** |
274 | * shall we skip application of this rule? two cases: |
275 | * - rule can't match at all => currentSkipOffset < 0 |
276 | * - rule will only match for some higher offset => currentSkipOffset > offset |
277 | * |
278 | * we need to invalidate this if we are dynamic and have different captures then last time |
279 | */ |
280 | if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) { |
281 | skipOffsets.clear(); |
282 | } else { |
283 | currentSkipOffset = getSkipOffsetValue(rule); |
284 | if (currentSkipOffset < 0 || currentSkipOffset > offset) { |
285 | continue; |
286 | } |
287 | } |
288 | } |
289 | |
290 | auto newResult = rule->doMatch(text, offset, captures: stateData->topCaptures(), dynamicRegexpCache); |
291 | newOffset = newResult.offset(); |
292 | |
293 | /** |
294 | * update skip offset if new one rules out any later match or is larger than current one |
295 | */ |
296 | if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) { |
297 | insertSkipOffset(rule, newResult.skipOffset()); |
298 | |
299 | // remember new captures, if dynamic to enforce proper reset above on change! |
300 | if (rule->isDynamic()) { |
301 | capturesForLastDynamicSkipOffset = stateData->topCaptures(); |
302 | } |
303 | } |
304 | |
305 | if (newOffset <= offset) { |
306 | continue; |
307 | } |
308 | |
309 | /** |
310 | * apply folding. |
311 | * special cases: |
312 | * - rule with endRegion + beginRegion: in endRegion, the length is 0 |
313 | * - rule with lookAhead: length is 0 |
314 | */ |
315 | if (rule->endRegion().isValid() && rule->beginRegion().isValid()) { |
316 | applyFolding(offset, length: 0, region: rule->endRegion()); |
317 | } else if (rule->endRegion().isValid()) { |
318 | applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->endRegion()); |
319 | } |
320 | if (rule->beginRegion().isValid()) { |
321 | applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->beginRegion()); |
322 | } |
323 | |
324 | if (rule->isLookAhead()) { |
325 | Q_ASSERT(!rule->context().isStay()); |
326 | d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData); |
327 | isLookAhead = true; |
328 | break; |
329 | } |
330 | |
331 | d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData); |
332 | newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat(); |
333 | if (newOffset == text.size() && rule->isLineContinue()) { |
334 | lineContinuation = true; |
335 | } |
336 | break; |
337 | } |
338 | if (isLookAhead) { |
339 | continue; |
340 | } |
341 | |
342 | if (newOffset <= offset) { // no matching rule |
343 | if (stateData->topContext()->fallthrough()) { |
344 | d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->fallthroughContext(), captures: QStringList(), state&: newState, isSharedData); |
345 | continue; |
346 | } |
347 | |
348 | newOffset = offset + 1; |
349 | newFormat = &stateData->topContext()->attributeFormat(); |
350 | } |
351 | |
352 | /** |
353 | * if we arrive here, some new format has to be set! |
354 | */ |
355 | Q_ASSERT(newFormat); |
356 | |
357 | /** |
358 | * on format change, apply the last one and switch to new one |
359 | */ |
360 | if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) { |
361 | if (offset > 0) { |
362 | applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat); |
363 | } |
364 | beginOffset = offset; |
365 | currentFormat = newFormat; |
366 | } |
367 | |
368 | /** |
369 | * we must have made progress if we arrive here! |
370 | */ |
371 | Q_ASSERT(newOffset > offset); |
372 | offset = newOffset; |
373 | |
374 | } while (offset < text.size()); |
375 | |
376 | /** |
377 | * apply format for remaining text, if any |
378 | */ |
379 | if (beginOffset < offset) { |
380 | applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat); |
381 | } |
382 | |
383 | /** |
384 | * handle line end context switches |
385 | * guard against endless loops |
386 | * see https://phabricator.kde.org/D18509 |
387 | */ |
388 | { |
389 | int endlessLoopingCounter = 0; |
390 | while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) { |
391 | if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEndContext(), captures: QStringList(), state&: newState, isSharedData)) { |
392 | break; |
393 | } |
394 | |
395 | // guard against endless loops |
396 | ++endlessLoopingCounter; |
397 | if (endlessLoopingCounter > 1024) { |
398 | qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line." ; |
399 | break; |
400 | } |
401 | } |
402 | } |
403 | |
404 | return *defData->unify.insert(value: newState); |
405 | } |
406 | |
407 | bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData) |
408 | { |
409 | const auto popCount = contextSwitch.popCount(); |
410 | const auto context = contextSwitch.context(); |
411 | if (popCount <= 0 && !context) { |
412 | return true; |
413 | } |
414 | |
415 | // a modified state must be detached before modification |
416 | if (isSharedData) { |
417 | data = StateData::detach(state); |
418 | isSharedData = false; |
419 | } |
420 | |
421 | // kill as many items as requested from the stack, will always keep the initial context alive! |
422 | const bool initialContextSurvived = data->pop(popCount); |
423 | |
424 | // if we have a new context to add, push it |
425 | // then we always "succeed" |
426 | if (context) { |
427 | data->push(context, captures: std::move(captures)); |
428 | return true; |
429 | } |
430 | |
431 | // else we abort, if we did try to pop the initial context |
432 | return initialContextSurvived; |
433 | } |
434 | |
435 | void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region) |
436 | { |
437 | Q_UNUSED(offset); |
438 | Q_UNUSED(length); |
439 | Q_UNUSED(region); |
440 | } |
441 | |