1 | /* |
2 | SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "abstracthighlighter.h" |
8 | #include "abstracthighlighter_p.h" |
9 | #include "context_p.h" |
10 | #include "definition_p.h" |
11 | #include "foldingregion.h" |
12 | #include "format.h" |
13 | #include "ksyntaxhighlighting_logging.h" |
14 | #include "repository.h" |
15 | #include "repository_p.h" |
16 | #include "rule_p.h" |
17 | #include "state.h" |
18 | #include "state_p.h" |
19 | #include "theme.h" |
20 | |
21 | using namespace KSyntaxHighlighting; |
22 | |
23 | AbstractHighlighterPrivate::AbstractHighlighterPrivate() |
24 | { |
25 | } |
26 | |
27 | AbstractHighlighterPrivate::~AbstractHighlighterPrivate() |
28 | { |
29 | } |
30 | |
31 | void AbstractHighlighterPrivate::ensureDefinitionLoaded() |
32 | { |
33 | auto defData = DefinitionData::get(def: m_definition); |
34 | if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) { |
35 | qCDebug(Log) << "Definition became invalid, trying re-lookup." ; |
36 | m_definition = defData->repo->definitionForName(defName: m_definition.name()); |
37 | defData = DefinitionData::get(def: m_definition); |
38 | } |
39 | |
40 | if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) { |
41 | qCCritical(Log) << "Repository got deleted while a highlighter is still active!" ; |
42 | } |
43 | |
44 | if (m_definition.isValid()) { |
45 | defData->load(); |
46 | } |
47 | } |
48 | |
49 | AbstractHighlighter::AbstractHighlighter() |
50 | : d_ptr(new AbstractHighlighterPrivate) |
51 | { |
52 | } |
53 | |
54 | AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd) |
55 | : d_ptr(dd) |
56 | { |
57 | } |
58 | |
59 | AbstractHighlighter::~AbstractHighlighter() |
60 | { |
61 | delete d_ptr; |
62 | } |
63 | |
64 | Definition AbstractHighlighter::definition() const |
65 | { |
66 | return d_ptr->m_definition; |
67 | } |
68 | |
69 | void AbstractHighlighter::setDefinition(const Definition &def) |
70 | { |
71 | Q_D(AbstractHighlighter); |
72 | d->m_definition = def; |
73 | } |
74 | |
75 | Theme AbstractHighlighter::theme() const |
76 | { |
77 | Q_D(const AbstractHighlighter); |
78 | return d->m_theme; |
79 | } |
80 | |
81 | void AbstractHighlighter::setTheme(const Theme &theme) |
82 | { |
83 | Q_D(AbstractHighlighter); |
84 | d->m_theme = theme; |
85 | } |
86 | |
87 | /** |
88 | * Returns the index of the first non-space character. If the line is empty, |
89 | * or only contains white spaces, text.size() is returned. |
90 | */ |
91 | static inline int firstNonSpaceChar(QStringView text) |
92 | { |
93 | for (int i = 0; i < text.length(); ++i) { |
94 | if (!text[i].isSpace()) { |
95 | return i; |
96 | } |
97 | } |
98 | return text.size(); |
99 | } |
100 | |
101 | State AbstractHighlighter::highlightLine(QStringView text, const State &state) |
102 | { |
103 | Q_D(AbstractHighlighter); |
104 | |
105 | // verify definition, deal with no highlighting being enabled |
106 | d->ensureDefinitionLoaded(); |
107 | const auto defData = DefinitionData::get(def: d->m_definition); |
108 | if (!d->m_definition.isValid() || !defData->isLoaded()) { |
109 | applyFormat(offset: 0, length: text.size(), format: Format()); |
110 | return State(); |
111 | } |
112 | |
113 | // limit the cache for unification to some reasonable size |
114 | // we use here at the moment 64k elements to not hog too much memory |
115 | // and to make the clearing no big stall |
116 | if (defData->unify.size() > 64 * 1024) |
117 | defData->unify.clear(); |
118 | |
119 | // verify/initialize state |
120 | auto newState = state; |
121 | auto stateData = StateData::get(state: newState); |
122 | bool isSharedData = true; |
123 | if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) { |
124 | qCDebug(Log) << "Got invalid state, resetting." ; |
125 | stateData = nullptr; |
126 | } |
127 | if (Q_UNLIKELY(!stateData)) { |
128 | stateData = StateData::reset(state&: newState); |
129 | stateData->push(context: defData->initialContext(), captures: QStringList()); |
130 | stateData->m_defId = defData->id; |
131 | isSharedData = false; |
132 | } |
133 | |
134 | // process empty lines |
135 | if (Q_UNLIKELY(text.isEmpty())) { |
136 | /** |
137 | * handle line empty context switches |
138 | * guard against endless loops |
139 | * see https://phabricator.kde.org/D18509 |
140 | */ |
141 | int endlessLoopingCounter = 0; |
142 | while (!stateData->topContext()->lineEmptyContext().isStay()) { |
143 | /** |
144 | * line empty context switches |
145 | */ |
146 | if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEmptyContext(), captures: QStringList(), state&: newState, isSharedData)) { |
147 | /** |
148 | * end when trying to #pop the main context |
149 | */ |
150 | break; |
151 | } |
152 | |
153 | if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) { |
154 | break; |
155 | } |
156 | |
157 | // guard against endless loops |
158 | ++endlessLoopingCounter; |
159 | if (endlessLoopingCounter > 1024) { |
160 | qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line." ; |
161 | break; |
162 | } |
163 | } |
164 | auto context = stateData->topContext(); |
165 | applyFormat(offset: 0, length: 0, format: context->attributeFormat()); |
166 | return *defData->unify.insert(value: newState); |
167 | } |
168 | |
169 | auto &dynamicRegexpCache = RepositoryPrivate::get(repo: defData->repo)->m_dynamicRegexpCache; |
170 | |
171 | int offset = 0; |
172 | int beginOffset = 0; |
173 | bool lineContinuation = false; |
174 | |
175 | /** |
176 | * for expensive rules like regexes we do: |
177 | * - match them for the complete line, as this is faster than re-trying them at all positions |
178 | * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use |
179 | * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed |
180 | */ |
181 | QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets; |
182 | QStringList capturesForLastDynamicSkipOffset; |
183 | |
184 | auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int { |
185 | auto i = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) { |
186 | return v.first == r; |
187 | }); |
188 | if (i == skipOffsets.end()) |
189 | return 0; |
190 | return i->second; |
191 | }; |
192 | |
193 | auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) { |
194 | auto it = std::find_if(first: skipOffsets.begin(), last: skipOffsets.end(), pred: [r](const auto &v) { |
195 | return v.first == r; |
196 | }); |
197 | if (it == skipOffsets.end()) { |
198 | skipOffsets.push_back(t: {r, i}); |
199 | } else { |
200 | it->second = i; |
201 | } |
202 | }; |
203 | |
204 | /** |
205 | * current active format |
206 | * stored as pointer to avoid deconstruction/constructions inside the internal loop |
207 | * the pointers are stable, the formats are either in the contexts or rules |
208 | */ |
209 | auto currentFormat = &stateData->topContext()->attributeFormat(); |
210 | |
211 | /** |
212 | * cached first non-space character, needs to be computed if < 0 |
213 | */ |
214 | int firstNonSpace = -1; |
215 | int lastOffset = offset; |
216 | int endlessLoopingCounter = 0; |
217 | do { |
218 | /** |
219 | * avoid that we loop endless for some broken hl definitions |
220 | */ |
221 | if (lastOffset == offset) { |
222 | ++endlessLoopingCounter; |
223 | if (endlessLoopingCounter > 1024) { |
224 | qCDebug(Log) << "Endless state transitions, aborting highlighting of line." ; |
225 | break; |
226 | } |
227 | } else { |
228 | // ensure we made progress, clear the endlessLoopingCounter |
229 | Q_ASSERT(offset > lastOffset); |
230 | lastOffset = offset; |
231 | endlessLoopingCounter = 0; |
232 | } |
233 | |
234 | /** |
235 | * try to match all rules in the context in order of declaration in XML |
236 | */ |
237 | bool isLookAhead = false; |
238 | int newOffset = 0; |
239 | const Format *newFormat = nullptr; |
240 | for (const auto &ruleShared : stateData->topContext()->rules()) { |
241 | auto rule = ruleShared.get(); |
242 | /** |
243 | * filter out rules that require a specific column |
244 | */ |
245 | if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) { |
246 | continue; |
247 | } |
248 | |
249 | /** |
250 | * filter out rules that only match for leading whitespace |
251 | */ |
252 | if (rule->firstNonSpace()) { |
253 | /** |
254 | * compute the first non-space lazy |
255 | * avoids computing it for contexts without any such rules |
256 | */ |
257 | if (firstNonSpace < 0) { |
258 | firstNonSpace = firstNonSpaceChar(text); |
259 | } |
260 | |
261 | /** |
262 | * can we skip? |
263 | */ |
264 | if (offset > firstNonSpace) { |
265 | continue; |
266 | } |
267 | } |
268 | |
269 | int currentSkipOffset = 0; |
270 | if (Q_UNLIKELY(rule->hasSkipOffset())) { |
271 | /** |
272 | * shall we skip application of this rule? two cases: |
273 | * - rule can't match at all => currentSkipOffset < 0 |
274 | * - rule will only match for some higher offset => currentSkipOffset > offset |
275 | * |
276 | * we need to invalidate this if we are dynamic and have different captures then last time |
277 | */ |
278 | if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) { |
279 | skipOffsets.clear(); |
280 | } else { |
281 | currentSkipOffset = getSkipOffsetValue(rule); |
282 | if (currentSkipOffset < 0 || currentSkipOffset > offset) { |
283 | continue; |
284 | } |
285 | } |
286 | } |
287 | |
288 | auto newResult = rule->doMatch(text, offset, captures: stateData->topCaptures(), dynamicRegexpCache); |
289 | newOffset = newResult.offset(); |
290 | |
291 | /** |
292 | * update skip offset if new one rules out any later match or is larger than current one |
293 | */ |
294 | if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) { |
295 | insertSkipOffset(rule, newResult.skipOffset()); |
296 | |
297 | // remember new captures, if dynamic to enforce proper reset above on change! |
298 | if (rule->isDynamic()) { |
299 | capturesForLastDynamicSkipOffset = stateData->topCaptures(); |
300 | } |
301 | } |
302 | |
303 | if (newOffset <= offset) { |
304 | continue; |
305 | } |
306 | |
307 | /** |
308 | * apply folding. |
309 | * special cases: |
310 | * - rule with endRegion + beginRegion: in endRegion, the length is 0 |
311 | * - rule with lookAhead: length is 0 |
312 | */ |
313 | if (rule->endRegion().isValid() && rule->beginRegion().isValid()) { |
314 | applyFolding(offset, length: 0, region: rule->endRegion()); |
315 | } else if (rule->endRegion().isValid()) { |
316 | applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->endRegion()); |
317 | } |
318 | if (rule->beginRegion().isValid()) { |
319 | applyFolding(offset, length: rule->isLookAhead() ? 0 : newOffset - offset, region: rule->beginRegion()); |
320 | } |
321 | |
322 | if (rule->isLookAhead()) { |
323 | Q_ASSERT(!rule->context().isStay()); |
324 | d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData); |
325 | isLookAhead = true; |
326 | break; |
327 | } |
328 | |
329 | d->switchContext(data&: stateData, contextSwitch: rule->context(), captures: std::move(newResult.captures()), state&: newState, isSharedData); |
330 | newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat(); |
331 | if (newOffset == text.size() && rule->isLineContinue()) { |
332 | lineContinuation = true; |
333 | } |
334 | break; |
335 | } |
336 | if (isLookAhead) { |
337 | continue; |
338 | } |
339 | |
340 | if (newOffset <= offset) { // no matching rule |
341 | if (stateData->topContext()->fallthrough()) { |
342 | d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->fallthroughContext(), captures: QStringList(), state&: newState, isSharedData); |
343 | continue; |
344 | } |
345 | |
346 | newOffset = offset + 1; |
347 | newFormat = &stateData->topContext()->attributeFormat(); |
348 | } |
349 | |
350 | /** |
351 | * if we arrive here, some new format has to be set! |
352 | */ |
353 | Q_ASSERT(newFormat); |
354 | |
355 | /** |
356 | * on format change, apply the last one and switch to new one |
357 | */ |
358 | if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) { |
359 | if (offset > 0) { |
360 | applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat); |
361 | } |
362 | beginOffset = offset; |
363 | currentFormat = newFormat; |
364 | } |
365 | |
366 | /** |
367 | * we must have made progress if we arrive here! |
368 | */ |
369 | Q_ASSERT(newOffset > offset); |
370 | offset = newOffset; |
371 | |
372 | } while (offset < text.size()); |
373 | |
374 | /** |
375 | * apply format for remaining text, if any |
376 | */ |
377 | if (beginOffset < offset) { |
378 | applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat); |
379 | } |
380 | |
381 | /** |
382 | * handle line end context switches |
383 | * guard against endless loops |
384 | * see https://phabricator.kde.org/D18509 |
385 | */ |
386 | { |
387 | int endlessLoopingCounter = 0; |
388 | while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) { |
389 | if (!d->switchContext(data&: stateData, contextSwitch: stateData->topContext()->lineEndContext(), captures: QStringList(), state&: newState, isSharedData)) { |
390 | break; |
391 | } |
392 | |
393 | // guard against endless loops |
394 | ++endlessLoopingCounter; |
395 | if (endlessLoopingCounter > 1024) { |
396 | qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line." ; |
397 | break; |
398 | } |
399 | } |
400 | } |
401 | |
402 | return *defData->unify.insert(value: newState); |
403 | } |
404 | |
405 | bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData) |
406 | { |
407 | const auto popCount = contextSwitch.popCount(); |
408 | const auto context = contextSwitch.context(); |
409 | if (popCount <= 0 && !context) { |
410 | return true; |
411 | } |
412 | |
413 | // a modified state must be detached before modification |
414 | if (isSharedData) { |
415 | data = StateData::detach(state); |
416 | isSharedData = false; |
417 | } |
418 | |
419 | // kill as many items as requested from the stack, will always keep the initial context alive! |
420 | const bool initialContextSurvived = data->pop(popCount); |
421 | |
422 | // if we have a new context to add, push it |
423 | // then we always "succeed" |
424 | if (context) { |
425 | data->push(context, captures: std::move(captures)); |
426 | return true; |
427 | } |
428 | |
429 | // else we abort, if we did try to pop the initial context |
430 | return initialContextSurvived; |
431 | } |
432 | |
433 | void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region) |
434 | { |
435 | Q_UNUSED(offset); |
436 | Q_UNUSED(length); |
437 | Q_UNUSED(region); |
438 | } |
439 | |