1 | /* |
2 | SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "abstracthighlighter.h" |
8 | #include "abstracthighlighter_p.h" |
9 | #include "context_p.h" |
10 | #include "definition_p.h" |
11 | #include "foldingregion.h" |
12 | #include "format.h" |
13 | #include "ksyntaxhighlighting_logging.h" |
14 | #include "repository.h" |
15 | #include "rule_p.h" |
16 | #include "state.h" |
17 | #include "state_p.h" |
18 | #include "theme.h" |
19 | |
20 | using namespace KSyntaxHighlighting; |
21 | |
22 | AbstractHighlighterPrivate::AbstractHighlighterPrivate() |
23 | { |
24 | } |
25 | |
26 | AbstractHighlighterPrivate::~AbstractHighlighterPrivate() |
27 | { |
28 | } |
29 | |
30 | void AbstractHighlighterPrivate::ensureDefinitionLoaded() |
31 | { |
32 | auto defData = DefinitionData::get(def: m_definition); |
33 | if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) { |
34 | qCDebug(Log) << "Definition became invalid, trying re-lookup." ; |
35 | m_definition = defData->repo->definitionForName(m_definition.name()); |
36 | defData = DefinitionData::get(def: m_definition); |
37 | } |
38 | |
39 | if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) { |
40 | qCCritical(Log) << "Repository got deleted while a highlighter is still active!" ; |
41 | } |
42 | |
43 | if (m_definition.isValid()) { |
44 | defData->load(); |
45 | } |
46 | } |
47 | |
48 | AbstractHighlighter::AbstractHighlighter() |
49 | : d_ptr(new AbstractHighlighterPrivate) |
50 | { |
51 | } |
52 | |
53 | AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd) |
54 | : d_ptr(dd) |
55 | { |
56 | } |
57 | |
58 | AbstractHighlighter::~AbstractHighlighter() |
59 | { |
60 | delete d_ptr; |
61 | } |
62 | |
63 | Definition AbstractHighlighter::definition() const |
64 | { |
65 | return d_ptr->m_definition; |
66 | } |
67 | |
68 | void AbstractHighlighter::setDefinition(const Definition &def) |
69 | { |
70 | Q_D(AbstractHighlighter); |
71 | d->m_definition = def; |
72 | } |
73 | |
74 | Theme AbstractHighlighter::theme() const |
75 | { |
76 | Q_D(const AbstractHighlighter); |
77 | return d->m_theme; |
78 | } |
79 | |
80 | void AbstractHighlighter::setTheme(const Theme &theme) |
81 | { |
82 | Q_D(AbstractHighlighter); |
83 | d->m_theme = theme; |
84 | } |
85 | |
86 | /** |
87 | * Returns the index of the first non-space character. If the line is empty, |
88 | * or only contains white spaces, text.size() is returned. |
89 | */ |
90 | static inline int firstNonSpaceChar(QStringView text) |
91 | { |
92 | for (int i = 0; i < text.length(); ++i) { |
93 | if (!text[i].isSpace()) { |
94 | return i; |
95 | } |
96 | } |
97 | return text.size(); |
98 | } |
99 | |
100 | State AbstractHighlighter::highlightLine(QStringView text, const State &state) |
101 | { |
102 | Q_D(AbstractHighlighter); |
103 | |
104 | // verify definition, deal with no highlighting being enabled |
105 | d->ensureDefinitionLoaded(); |
106 | const auto defData = DefinitionData::get(d->m_definition); |
107 | if (!d->m_definition.isValid() || !defData->isLoaded()) { |
108 | applyFormat(offset: 0, length: text.size(), format: Format()); |
109 | return State(); |
110 | } |
111 | |
112 | // limit the cache for unification to some reasonable size |
113 | // we use here at the moment 64k elements to not hog too much memory |
114 | // and to make the clearing no big stall |
115 | if (defData->unify.size() > 64 * 1024) |
116 | defData->unify.clear(); |
117 | |
118 | // verify/initialize state |
119 | auto newState = state; |
120 | auto stateData = StateData::get(state: newState); |
121 | bool isSharedData = true; |
122 | if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) { |
123 | qCDebug(Log) << "Got invalid state, resetting." ; |
124 | stateData = nullptr; |
125 | } |
126 | if (Q_UNLIKELY(!stateData)) { |
127 | stateData = StateData::reset(state&: newState); |
128 | stateData->push(defData->initialContext(), QStringList()); |
129 | stateData->m_defId = defData->id; |
130 | isSharedData = false; |
131 | } |
132 | |
133 | // process empty lines |
134 | if (Q_UNLIKELY(text.isEmpty())) { |
135 | /** |
136 | * handle line empty context switches |
137 | * guard against endless loops |
138 | * see https://phabricator.kde.org/D18509 |
139 | */ |
140 | int endlessLoopingCounter = 0; |
141 | while (!stateData->topContext()->lineEmptyContext().isStay()) { |
142 | /** |
143 | * line empty context switches |
144 | */ |
145 | if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) { |
146 | /** |
147 | * end when trying to #pop the main context |
148 | */ |
149 | break; |
150 | } |
151 | |
152 | if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) { |
153 | break; |
154 | } |
155 | |
156 | // guard against endless loops |
157 | ++endlessLoopingCounter; |
158 | if (endlessLoopingCounter > 1024) { |
159 | qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line." ; |
160 | break; |
161 | } |
162 | } |
163 | auto context = stateData->topContext(); |
164 | applyFormat(offset: 0, length: 0, format: context->attributeFormat()); |
165 | return *defData->unify.insert(newState); |
166 | } |
167 | |
168 | int offset = 0; |
169 | int beginOffset = 0; |
170 | bool lineContinuation = false; |
171 | |
172 | /** |
173 | * for expensive rules like regexes we do: |
174 | * - match them for the complete line, as this is faster than re-trying them at all positions |
175 | * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use |
176 | * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed |
177 | */ |
178 | QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets; |
179 | QStringList capturesForLastDynamicSkipOffset; |
180 | |
181 | auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int { |
182 | auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) { |
183 | return v.first == r; |
184 | }); |
185 | if (i == skipOffsets.end()) |
186 | return 0; |
187 | return i->second; |
188 | }; |
189 | |
190 | auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) { |
191 | auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) { |
192 | return v.first == r; |
193 | }); |
194 | if (it == skipOffsets.end()) { |
195 | skipOffsets.push_back({r, i}); |
196 | } else { |
197 | it->second = i; |
198 | } |
199 | }; |
200 | |
201 | /** |
202 | * current active format |
203 | * stored as pointer to avoid deconstruction/constructions inside the internal loop |
204 | * the pointers are stable, the formats are either in the contexts or rules |
205 | */ |
206 | auto currentFormat = &stateData->topContext()->attributeFormat(); |
207 | |
208 | /** |
209 | * cached first non-space character, needs to be computed if < 0 |
210 | */ |
211 | int firstNonSpace = -1; |
212 | int lastOffset = offset; |
213 | int endlessLoopingCounter = 0; |
214 | do { |
215 | /** |
216 | * avoid that we loop endless for some broken hl definitions |
217 | */ |
218 | if (lastOffset == offset) { |
219 | ++endlessLoopingCounter; |
220 | if (endlessLoopingCounter > 1024) { |
221 | qCDebug(Log) << "Endless state transitions, aborting highlighting of line." ; |
222 | break; |
223 | } |
224 | } else { |
225 | // ensure we made progress, clear the endlessLoopingCounter |
226 | Q_ASSERT(offset > lastOffset); |
227 | lastOffset = offset; |
228 | endlessLoopingCounter = 0; |
229 | } |
230 | |
231 | /** |
232 | * try to match all rules in the context in order of declaration in XML |
233 | */ |
234 | bool isLookAhead = false; |
235 | int newOffset = 0; |
236 | const Format *newFormat = nullptr; |
237 | for (const auto &rule : stateData->topContext()->rules()) { |
238 | /** |
239 | * filter out rules that require a specific column |
240 | */ |
241 | if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) { |
242 | continue; |
243 | } |
244 | |
245 | /** |
246 | * filter out rules that only match for leading whitespace |
247 | */ |
248 | if (rule->firstNonSpace()) { |
249 | /** |
250 | * compute the first non-space lazy |
251 | * avoids computing it for contexts without any such rules |
252 | */ |
253 | if (firstNonSpace < 0) { |
254 | firstNonSpace = firstNonSpaceChar(text); |
255 | } |
256 | |
257 | /** |
258 | * can we skip? |
259 | */ |
260 | if (offset > firstNonSpace) { |
261 | continue; |
262 | } |
263 | } |
264 | |
265 | int currentSkipOffset = 0; |
266 | if (Q_UNLIKELY(rule->hasSkipOffset())) { |
267 | /** |
268 | * shall we skip application of this rule? two cases: |
269 | * - rule can't match at all => currentSkipOffset < 0 |
270 | * - rule will only match for some higher offset => currentSkipOffset > offset |
271 | * |
272 | * we need to invalidate this if we are dynamic and have different captures then last time |
273 | */ |
274 | if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) { |
275 | skipOffsets.clear(); |
276 | } else { |
277 | currentSkipOffset = getSkipOffsetValue(rule.get()); |
278 | if (currentSkipOffset < 0 || currentSkipOffset > offset) { |
279 | continue; |
280 | } |
281 | } |
282 | } |
283 | |
284 | auto newResult = rule->doMatch(text, offset, stateData->topCaptures()); |
285 | newOffset = newResult.offset(); |
286 | |
287 | /** |
288 | * update skip offset if new one rules out any later match or is larger than current one |
289 | */ |
290 | if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) { |
291 | insertSkipOffset(rule.get(), newResult.skipOffset()); |
292 | |
293 | // remember new captures, if dynamic to enforce proper reset above on change! |
294 | if (rule->isDynamic()) { |
295 | capturesForLastDynamicSkipOffset = stateData->topCaptures(); |
296 | } |
297 | } |
298 | |
299 | if (newOffset <= offset) { |
300 | continue; |
301 | } |
302 | |
303 | /** |
304 | * apply folding. |
305 | * special cases: |
306 | * - rule with endRegion + beginRegion: in endRegion, the length is 0 |
307 | * - rule with lookAhead: length is 0 |
308 | */ |
309 | if (rule->endRegion().isValid() && rule->beginRegion().isValid()) { |
310 | applyFolding(offset, 0, rule->endRegion()); |
311 | } else if (rule->endRegion().isValid()) { |
312 | applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion()); |
313 | } |
314 | if (rule->beginRegion().isValid()) { |
315 | applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion()); |
316 | } |
317 | |
318 | if (rule->isLookAhead()) { |
319 | Q_ASSERT(!rule->context().isStay()); |
320 | d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData); |
321 | isLookAhead = true; |
322 | break; |
323 | } |
324 | |
325 | d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData); |
326 | newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat(); |
327 | if (newOffset == text.size() && rule->isLineContinue()) { |
328 | lineContinuation = true; |
329 | } |
330 | break; |
331 | } |
332 | if (isLookAhead) { |
333 | continue; |
334 | } |
335 | |
336 | if (newOffset <= offset) { // no matching rule |
337 | if (stateData->topContext()->fallthrough()) { |
338 | d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData); |
339 | continue; |
340 | } |
341 | |
342 | newOffset = offset + 1; |
343 | newFormat = &stateData->topContext()->attributeFormat(); |
344 | } |
345 | |
346 | /** |
347 | * if we arrive here, some new format has to be set! |
348 | */ |
349 | Q_ASSERT(newFormat); |
350 | |
351 | /** |
352 | * on format change, apply the last one and switch to new one |
353 | */ |
354 | if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) { |
355 | if (offset > 0) { |
356 | applyFormat(offset: beginOffset, length: offset - beginOffset, format: *currentFormat); |
357 | } |
358 | beginOffset = offset; |
359 | currentFormat = newFormat; |
360 | } |
361 | |
362 | /** |
363 | * we must have made progress if we arrive here! |
364 | */ |
365 | Q_ASSERT(newOffset > offset); |
366 | offset = newOffset; |
367 | |
368 | } while (offset < text.size()); |
369 | |
370 | /** |
371 | * apply format for remaining text, if any |
372 | */ |
373 | if (beginOffset < offset) { |
374 | applyFormat(offset: beginOffset, length: text.size() - beginOffset, format: *currentFormat); |
375 | } |
376 | |
377 | /** |
378 | * handle line end context switches |
379 | * guard against endless loops |
380 | * see https://phabricator.kde.org/D18509 |
381 | */ |
382 | { |
383 | int endlessLoopingCounter = 0; |
384 | while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) { |
385 | if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) { |
386 | break; |
387 | } |
388 | |
389 | // guard against endless loops |
390 | ++endlessLoopingCounter; |
391 | if (endlessLoopingCounter > 1024) { |
392 | qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line." ; |
393 | break; |
394 | } |
395 | } |
396 | } |
397 | |
398 | return *defData->unify.insert(newState); |
399 | } |
400 | |
401 | bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData) |
402 | { |
403 | const auto popCount = contextSwitch.popCount(); |
404 | const auto context = contextSwitch.context(); |
405 | if (popCount <= 0 && !context) { |
406 | return true; |
407 | } |
408 | |
409 | // a modified state must be detached before modification |
410 | if (isSharedData) { |
411 | data = StateData::detach(state); |
412 | isSharedData = false; |
413 | } |
414 | |
415 | // kill as many items as requested from the stack, will always keep the initial context alive! |
416 | const bool initialContextSurvived = data->pop(popCount); |
417 | |
418 | // if we have a new context to add, push it |
419 | // then we always "succeed" |
420 | if (context) { |
421 | data->push(context, std::move(captures)); |
422 | return true; |
423 | } |
424 | |
425 | // else we abort, if we did try to pop the initial context |
426 | return initialContextSurvived; |
427 | } |
428 | |
429 | void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region) |
430 | { |
431 | Q_UNUSED(offset); |
432 | Q_UNUSED(length); |
433 | Q_UNUSED(region); |
434 | } |
435 | |