1 | /* |
2 | SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org> |
3 | SPDX-FileCopyrightText: 2018 Christoph Cullmann <cullmann@kde.org> |
4 | SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com> |
5 | |
6 | SPDX-License-Identifier: MIT |
7 | */ |
8 | |
9 | #include "context_p.h" |
10 | #include "definition_p.h" |
11 | #include "ksyntaxhighlighting_logging.h" |
12 | #include "rule_p.h" |
13 | #include "worddelimiters_p.h" |
14 | #include "xml_p.h" |
15 | |
16 | #include <QString> |
17 | |
18 | using namespace KSyntaxHighlighting; |
19 | |
20 | // QChar::isDigit() match any digit in unicode (romain numeral, etc) |
21 | static bool isDigit(QChar c) |
22 | { |
23 | return (c <= QLatin1Char('9') && QLatin1Char('0') <= c); |
24 | } |
25 | |
26 | static bool isOctalChar(QChar c) |
27 | { |
28 | return (c <= QLatin1Char('7') && QLatin1Char('0') <= c); |
29 | } |
30 | |
31 | static bool isHexChar(QChar c) |
32 | { |
33 | return isDigit(c) || (c <= QLatin1Char('f') && QLatin1Char('a') <= c) || (c <= QLatin1Char('F') && QLatin1Char('A') <= c); |
34 | } |
35 | |
36 | static int matchEscapedChar(QStringView text, int offset) |
37 | { |
38 | if (text.at(offset) != QLatin1Char('\\') || text.size() < offset + 2) { |
39 | return offset; |
40 | } |
41 | |
42 | const auto c = text.at(offset + 1); |
43 | switch (c.unicode()) { |
44 | // control chars |
45 | case 'a': |
46 | case 'b': |
47 | case 'e': |
48 | case 'f': |
49 | case 'n': |
50 | case 'r': |
51 | case 't': |
52 | case 'v': |
53 | case '"': |
54 | case '\'': |
55 | case '?': |
56 | case '\\': |
57 | return offset + 2; |
58 | |
59 | // hex encoded character |
60 | case 'x': |
61 | if (offset + 2 < text.size() && isHexChar(text.at(offset + 2))) { |
62 | if (offset + 3 < text.size() && isHexChar(text.at(offset + 3))) { |
63 | return offset + 4; |
64 | } |
65 | return offset + 3; |
66 | } |
67 | return offset; |
68 | |
69 | // octal encoding, simple \0 is OK, too, unlike simple \x above |
70 | case '0': |
71 | case '1': |
72 | case '2': |
73 | case '3': |
74 | case '4': |
75 | case '5': |
76 | case '6': |
77 | case '7': |
78 | if (offset + 2 < text.size() && isOctalChar(text.at(offset + 2))) { |
79 | if (offset + 3 < text.size() && isOctalChar(text.at(offset + 3))) { |
80 | return offset + 4; |
81 | } |
82 | return offset + 3; |
83 | } |
84 | return offset + 2; |
85 | } |
86 | |
87 | return offset; |
88 | } |
89 | |
90 | static QString replaceCaptures(const QString &pattern, const QStringList &captures, bool quote) |
91 | { |
92 | auto result = pattern; |
93 | for (int i = captures.size(); i >= 1; --i) { |
94 | result.replace(QLatin1Char('%') + QString::number(i), quote ? QRegularExpression::escape(captures.at(i - 1)) : captures.at(i - 1)); |
95 | } |
96 | return result; |
97 | } |
98 | |
99 | static MatchResult matchString(QStringView pattern, QStringView text, int offset, Qt::CaseSensitivity caseSensitivity) |
100 | { |
101 | if (offset + pattern.size() <= text.size() && text.mid(offset, pattern.size()).compare(pattern, caseSensitivity) == 0) { |
102 | return offset + pattern.size(); |
103 | } |
104 | return offset; |
105 | } |
106 | |
107 | static void resolveAdditionalWordDelimiters(WordDelimiters &wordDelimiters, const HighlightingContextData::Rule::WordDelimiters &delimiters) |
108 | { |
109 | // cache for DefinitionData::wordDelimiters, is accessed VERY often |
110 | if (!delimiters.additionalDeliminator.isEmpty() || !delimiters.weakDeliminator.isEmpty()) { |
111 | wordDelimiters.append(QStringView(delimiters.additionalDeliminator)); |
112 | wordDelimiters.remove(QStringView(delimiters.weakDeliminator)); |
113 | } |
114 | } |
115 | |
116 | Rule::~Rule() = default; |
117 | |
118 | const IncludeRules *Rule::castToIncludeRules() const |
119 | { |
120 | if (m_type != Type::IncludeRules) { |
121 | return nullptr; |
122 | } |
123 | return static_cast<const IncludeRules *>(this); |
124 | } |
125 | |
126 | bool Rule::resolveCommon(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName) |
127 | { |
128 | switch (ruleData.type) { |
129 | // IncludeRules uses this with a different semantic |
130 | case HighlightingContextData::Rule::Type::IncludeRules: |
131 | m_type = Type::IncludeRules; |
132 | return true; |
133 | case HighlightingContextData::Rule::Type::LineContinue: |
134 | m_type = Type::LineContinue; |
135 | break; |
136 | default: |
137 | m_type = Type::OtherRule; |
138 | break; |
139 | } |
140 | |
141 | /** |
142 | * try to get our format from the definition we stem from |
143 | */ |
144 | if (!ruleData.common.attributeName.isEmpty()) { |
145 | m_attributeFormat = def.formatByName(ruleData.common.attributeName); |
146 | if (!m_attributeFormat.isValid()) { |
147 | qCWarning(Log) << "Rule: Unknown format" << ruleData.common.attributeName << "in context" << lookupContextName << "of definition" << def.name; |
148 | } |
149 | } |
150 | |
151 | m_firstNonSpace = ruleData.common.firstNonSpace; |
152 | m_lookAhead = ruleData.common.lookAhead; |
153 | m_column = ruleData.common.column; |
154 | |
155 | if (!ruleData.common.beginRegionName.isEmpty()) { |
156 | m_beginRegion = FoldingRegion(FoldingRegion::Begin, def.foldingRegionId(ruleData.common.beginRegionName)); |
157 | } |
158 | if (!ruleData.common.endRegionName.isEmpty()) { |
159 | m_endRegion = FoldingRegion(FoldingRegion::End, def.foldingRegionId(ruleData.common.endRegionName)); |
160 | } |
161 | |
162 | m_context.resolve(def, ruleData.common.contextName); |
163 | |
164 | return !(m_lookAhead && m_context.isStay()); |
165 | } |
166 | |
167 | static Rule::Ptr createRule(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName) |
168 | { |
169 | using Type = HighlightingContextData::Rule::Type; |
170 | |
171 | switch (ruleData.type) { |
172 | case Type::AnyChar: |
173 | return std::make_shared<AnyChar>(ruleData.data.anyChar); |
174 | case Type::DetectChar: |
175 | return std::make_shared<DetectChar>(ruleData.data.detectChar); |
176 | case Type::Detect2Chars: |
177 | return std::make_shared<Detect2Chars>(ruleData.data.detect2Chars); |
178 | case Type::IncludeRules: |
179 | return std::make_shared<IncludeRules>(ruleData.data.includeRules); |
180 | case Type::Int: |
181 | return std::make_shared<Int>(def, ruleData.data.detectInt); |
182 | case Type::Keyword: |
183 | return KeywordListRule::create(def, ruleData.data.keyword, lookupContextName); |
184 | case Type::LineContinue: |
185 | return std::make_shared<LineContinue>(ruleData.data.lineContinue); |
186 | case Type::RangeDetect: |
187 | return std::make_shared<RangeDetect>(ruleData.data.rangeDetect); |
188 | case Type::RegExpr: |
189 | if (!ruleData.data.regExpr.dynamic) { |
190 | return std::make_shared<RegExpr>(ruleData.data.regExpr); |
191 | } else { |
192 | return std::make_shared<DynamicRegExpr>(ruleData.data.regExpr); |
193 | } |
194 | case Type::StringDetect: |
195 | if (ruleData.data.stringDetect.dynamic) { |
196 | return std::make_shared<DynamicStringDetect>(ruleData.data.stringDetect); |
197 | } |
198 | return std::make_shared<StringDetect>(ruleData.data.stringDetect); |
199 | case Type::WordDetect: |
200 | return std::make_shared<WordDetect>(def, ruleData.data.wordDetect); |
201 | case Type::Float: |
202 | return std::make_shared<Float>(def, ruleData.data.detectFloat); |
203 | case Type::HlCOct: |
204 | return std::make_shared<HlCOct>(def, ruleData.data.hlCOct); |
205 | case Type::HlCStringChar: |
206 | return std::make_shared<HlCStringChar>(); |
207 | case Type::DetectIdentifier: |
208 | return std::make_shared<DetectIdentifier>(); |
209 | case Type::DetectSpaces: |
210 | return std::make_shared<DetectSpaces>(); |
211 | case Type::HlCChar: |
212 | return std::make_shared<HlCChar>(); |
213 | case Type::HlCHex: |
214 | return std::make_shared<HlCHex>(def, ruleData.data.hlCHex); |
215 | |
216 | case Type::Unknown:; |
217 | } |
218 | |
219 | return Rule::Ptr(nullptr); |
220 | } |
221 | |
222 | Rule::Ptr Rule::create(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName) |
223 | { |
224 | auto rule = createRule(def, ruleData, lookupContextName); |
225 | if (rule && !rule->resolveCommon(def, ruleData, lookupContextName)) { |
226 | rule.reset(); |
227 | } |
228 | return rule; |
229 | } |
230 | |
231 | AnyChar::AnyChar(const HighlightingContextData::Rule::AnyChar &data) |
232 | : m_chars(data.chars) |
233 | { |
234 | } |
235 | |
236 | MatchResult AnyChar::doMatch(QStringView text, int offset, const QStringList &) const |
237 | { |
238 | if (m_chars.contains(text.at(offset))) { |
239 | return offset + 1; |
240 | } |
241 | return offset; |
242 | } |
243 | |
244 | DetectChar::DetectChar(const HighlightingContextData::Rule::DetectChar &data) |
245 | : m_char(data.char1) |
246 | , m_captureIndex((data.dynamic ? data.char1.digitValue() : 0) - 1) |
247 | { |
248 | m_dynamic = data.dynamic; |
249 | } |
250 | |
251 | MatchResult DetectChar::doMatch(QStringView text, int offset, const QStringList &captures) const |
252 | { |
253 | if (m_dynamic) { |
254 | if (m_captureIndex == -1 || captures.size() <= m_captureIndex || captures.at(m_captureIndex).isEmpty()) { |
255 | return offset; |
256 | } |
257 | if (text.at(offset) == captures.at(m_captureIndex).at(0)) { |
258 | return offset + 1; |
259 | } |
260 | return offset; |
261 | } |
262 | |
263 | if (text.at(offset) == m_char) { |
264 | return offset + 1; |
265 | } |
266 | return offset; |
267 | } |
268 | |
269 | Detect2Chars::Detect2Chars(const HighlightingContextData::Rule::Detect2Chars &data) |
270 | : m_char1(data.char1) |
271 | , m_char2(data.char2) |
272 | { |
273 | } |
274 | |
275 | MatchResult Detect2Chars::doMatch(QStringView text, int offset, const QStringList &) const |
276 | { |
277 | if (text.size() - offset < 2) { |
278 | return offset; |
279 | } |
280 | if (text.at(offset) == m_char1 && text.at(offset + 1) == m_char2) { |
281 | return offset + 2; |
282 | } |
283 | return offset; |
284 | } |
285 | |
286 | MatchResult DetectIdentifier::doMatch(QStringView text, int offset, const QStringList &) const |
287 | { |
288 | if (!text.at(offset).isLetter() && text.at(offset) != QLatin1Char('_')) { |
289 | return offset; |
290 | } |
291 | |
292 | for (int i = offset + 1; i < text.size(); ++i) { |
293 | const auto c = text.at(i); |
294 | if (!c.isLetterOrNumber() && c != QLatin1Char('_')) { |
295 | return i; |
296 | } |
297 | } |
298 | |
299 | return text.size(); |
300 | } |
301 | |
302 | MatchResult DetectSpaces::doMatch(QStringView text, int offset, const QStringList &) const |
303 | { |
304 | while (offset < text.size() && text.at(offset).isSpace()) { |
305 | ++offset; |
306 | } |
307 | return offset; |
308 | } |
309 | |
310 | Float::Float(DefinitionData &def, const HighlightingContextData::Rule::Float &data) |
311 | : m_wordDelimiters(def.wordDelimiters) |
312 | { |
313 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
314 | } |
315 | |
316 | MatchResult Float::doMatch(QStringView text, int offset, const QStringList &) const |
317 | { |
318 | if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) { |
319 | return offset; |
320 | } |
321 | |
322 | auto newOffset = offset; |
323 | while (newOffset < text.size() && isDigit(text.at(newOffset))) { |
324 | ++newOffset; |
325 | } |
326 | |
327 | if (newOffset >= text.size() || text.at(newOffset) != QLatin1Char('.')) { |
328 | return offset; |
329 | } |
330 | ++newOffset; |
331 | |
332 | while (newOffset < text.size() && isDigit(text.at(newOffset))) { |
333 | ++newOffset; |
334 | } |
335 | |
336 | if (newOffset == offset + 1) { // we only found a decimal point |
337 | return offset; |
338 | } |
339 | |
340 | auto expOffset = newOffset; |
341 | if (expOffset >= text.size() || (text.at(expOffset) != QLatin1Char('e') && text.at(expOffset) != QLatin1Char('E'))) { |
342 | return newOffset; |
343 | } |
344 | ++expOffset; |
345 | |
346 | if (expOffset < text.size() && (text.at(expOffset) == QLatin1Char('+') || text.at(expOffset) == QLatin1Char('-'))) { |
347 | ++expOffset; |
348 | } |
349 | bool foundExpDigit = false; |
350 | while (expOffset < text.size() && isDigit(text.at(expOffset))) { |
351 | ++expOffset; |
352 | foundExpDigit = true; |
353 | } |
354 | |
355 | if (!foundExpDigit) { |
356 | return newOffset; |
357 | } |
358 | return expOffset; |
359 | } |
360 | |
361 | MatchResult HlCChar::doMatch(QStringView text, int offset, const QStringList &) const |
362 | { |
363 | if (text.size() < offset + 3) { |
364 | return offset; |
365 | } |
366 | |
367 | if (text.at(offset) != QLatin1Char('\'') || text.at(offset + 1) == QLatin1Char('\'')) { |
368 | return offset; |
369 | } |
370 | |
371 | auto newOffset = matchEscapedChar(text, offset + 1); |
372 | if (newOffset == offset + 1) { |
373 | if (text.at(newOffset) == QLatin1Char('\\')) { |
374 | return offset; |
375 | } else { |
376 | ++newOffset; |
377 | } |
378 | } |
379 | if (newOffset >= text.size()) { |
380 | return offset; |
381 | } |
382 | |
383 | if (text.at(newOffset) == QLatin1Char('\'')) { |
384 | return newOffset + 1; |
385 | } |
386 | |
387 | return offset; |
388 | } |
389 | |
390 | HlCHex::HlCHex(DefinitionData &def, const HighlightingContextData::Rule::HlCHex &data) |
391 | : m_wordDelimiters(def.wordDelimiters) |
392 | { |
393 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
394 | } |
395 | |
396 | MatchResult HlCHex::doMatch(QStringView text, int offset, const QStringList &) const |
397 | { |
398 | if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) { |
399 | return offset; |
400 | } |
401 | |
402 | if (text.size() < offset + 3) { |
403 | return offset; |
404 | } |
405 | |
406 | if (text.at(offset) != QLatin1Char('0') || (text.at(offset + 1) != QLatin1Char('x') && text.at(offset + 1) != QLatin1Char('X'))) { |
407 | return offset; |
408 | } |
409 | |
410 | if (!isHexChar(text.at(offset + 2))) { |
411 | return offset; |
412 | } |
413 | |
414 | offset += 3; |
415 | while (offset < text.size() && isHexChar(text.at(offset))) { |
416 | ++offset; |
417 | } |
418 | |
419 | // TODO Kate matches U/L suffix, QtC does not? |
420 | |
421 | return offset; |
422 | } |
423 | |
424 | HlCOct::HlCOct(DefinitionData &def, const HighlightingContextData::Rule::HlCOct &data) |
425 | : m_wordDelimiters(def.wordDelimiters) |
426 | { |
427 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
428 | } |
429 | |
430 | MatchResult HlCOct::doMatch(QStringView text, int offset, const QStringList &) const |
431 | { |
432 | if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) { |
433 | return offset; |
434 | } |
435 | |
436 | if (text.size() < offset + 2) { |
437 | return offset; |
438 | } |
439 | |
440 | if (text.at(offset) != QLatin1Char('0')) { |
441 | return offset; |
442 | } |
443 | |
444 | if (!isOctalChar(text.at(offset + 1))) { |
445 | return offset; |
446 | } |
447 | |
448 | offset += 2; |
449 | while (offset < text.size() && isOctalChar(text.at(offset))) { |
450 | ++offset; |
451 | } |
452 | |
453 | return offset; |
454 | } |
455 | |
456 | MatchResult HlCStringChar::doMatch(QStringView text, int offset, const QStringList &) const |
457 | { |
458 | return matchEscapedChar(text, offset); |
459 | } |
460 | |
461 | IncludeRules::IncludeRules(const HighlightingContextData::Rule::IncludeRules &data) |
462 | : m_contextName(data.contextName) |
463 | , m_includeAttribute(data.includeAttribute) |
464 | { |
465 | } |
466 | |
467 | MatchResult IncludeRules::doMatch(QStringView text, int offset, const QStringList &) const |
468 | { |
469 | Q_UNUSED(text); |
470 | qCWarning(Log) << "Unresolved include rule" ; |
471 | return offset; |
472 | } |
473 | |
474 | Int::Int(DefinitionData &def, const HighlightingContextData::Rule::Int &data) |
475 | : m_wordDelimiters(def.wordDelimiters) |
476 | { |
477 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
478 | } |
479 | |
480 | MatchResult Int::doMatch(QStringView text, int offset, const QStringList &) const |
481 | { |
482 | if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) { |
483 | return offset; |
484 | } |
485 | |
486 | while (offset < text.size() && isDigit(text.at(offset))) { |
487 | ++offset; |
488 | } |
489 | return offset; |
490 | } |
491 | |
492 | Rule::Ptr KeywordListRule::create(DefinitionData &def, const HighlightingContextData::Rule::Keyword &data, QStringView lookupContextName) |
493 | { |
494 | /** |
495 | * get our keyword list, if not found => bail out |
496 | */ |
497 | auto *keywordList = def.keywordList(name: data.name); |
498 | if (!keywordList) { |
499 | qCWarning(Log) << "Rule: Unknown keyword list" << data.name << "in context" << lookupContextName << "of definition" << def.name; |
500 | return Rule::Ptr(); |
501 | } |
502 | |
503 | if (keywordList->isEmpty()) { |
504 | return Rule::Ptr(); |
505 | } |
506 | |
507 | /** |
508 | * we might overwrite the case sensitivity |
509 | * then we need to init the list for lookup of that sensitivity setting |
510 | */ |
511 | if (data.hasCaseSensitivityOverride) { |
512 | keywordList->initLookupForCaseSensitivity(data.caseSensitivityOverride); |
513 | } |
514 | |
515 | return std::make_shared<KeywordListRule>(*keywordList, def, data); |
516 | } |
517 | |
518 | KeywordListRule::KeywordListRule(const KeywordList &keywordList, DefinitionData &def, const HighlightingContextData::Rule::Keyword &data) |
519 | : m_wordDelimiters(def.wordDelimiters) |
520 | , m_keywordList(keywordList) |
521 | , m_caseSensitivity(data.hasCaseSensitivityOverride ? data.caseSensitivityOverride : keywordList.caseSensitivity()) |
522 | { |
523 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
524 | m_hasSkipOffset = true; |
525 | } |
526 | |
527 | MatchResult KeywordListRule::doMatch(QStringView text, int offset, const QStringList &) const |
528 | { |
529 | auto newOffset = offset; |
530 | while (text.size() > newOffset && !m_wordDelimiters.contains(text.at(newOffset))) { |
531 | ++newOffset; |
532 | } |
533 | if (newOffset == offset) { |
534 | return offset; |
535 | } |
536 | |
537 | if (m_keywordList.contains(text.mid(offset, newOffset - offset), m_caseSensitivity)) { |
538 | return newOffset; |
539 | } |
540 | |
541 | // we don't match, but we can skip until newOffset as we can't start a keyword in-between |
542 | return MatchResult(offset, newOffset); |
543 | } |
544 | |
545 | LineContinue::LineContinue(const HighlightingContextData::Rule::LineContinue &data) |
546 | : m_char(data.char1) |
547 | { |
548 | } |
549 | |
550 | MatchResult LineContinue::doMatch(QStringView text, int offset, const QStringList &) const |
551 | { |
552 | if (offset == text.size() - 1 && text.at(offset) == m_char) { |
553 | return offset + 1; |
554 | } |
555 | return offset; |
556 | } |
557 | |
558 | RangeDetect::RangeDetect(const HighlightingContextData::Rule::RangeDetect &data) |
559 | : m_begin(data.begin) |
560 | , m_end(data.end) |
561 | { |
562 | } |
563 | |
564 | MatchResult RangeDetect::doMatch(QStringView text, int offset, const QStringList &) const |
565 | { |
566 | if (text.size() - offset < 2) { |
567 | return offset; |
568 | } |
569 | if (text.at(offset) != m_begin) { |
570 | return offset; |
571 | } |
572 | |
573 | auto newOffset = offset + 1; |
574 | while (newOffset < text.size()) { |
575 | if (text.at(newOffset) == m_end) { |
576 | return newOffset + 1; |
577 | } |
578 | ++newOffset; |
579 | } |
580 | return offset; |
581 | } |
582 | |
583 | static QRegularExpression::PatternOptions makePattenOptions(const HighlightingContextData::Rule::RegExpr &data) |
584 | { |
585 | return (data.isMinimal ? QRegularExpression::InvertedGreedinessOption : QRegularExpression::NoPatternOption) |
586 | | (data.caseSensitivity == Qt::CaseInsensitive ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption) |
587 | // DontCaptureOption is removed by resolve() when necessary |
588 | | QRegularExpression::DontCaptureOption |
589 | // ensure Unicode support is enabled |
590 | | QRegularExpression::UseUnicodePropertiesOption; |
591 | } |
592 | |
593 | static void resolveRegex(QRegularExpression ®exp, Context *context) |
594 | { |
595 | bool enableCapture = context && context->hasDynamicRule(); |
596 | |
597 | // disable DontCaptureOption when reference a context with dynamic rule or |
598 | // with invalid regex because DontCaptureOption with back reference capture is an error |
599 | if (enableCapture || !regexp.isValid()) { |
600 | regexp.setPatternOptions(regexp.patternOptions() & ~QRegularExpression::DontCaptureOption); |
601 | } |
602 | |
603 | if (!regexp.isValid()) { |
604 | qCDebug(Log) << "Invalid regexp:" << regexp.pattern(); |
605 | } |
606 | } |
607 | |
608 | static MatchResult regexMatch(const QRegularExpression ®exp, QStringView text, int offset) |
609 | { |
610 | /** |
611 | * match the pattern |
612 | */ |
613 | const auto result = regexp.matchView(text, offset, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption); |
614 | if (result.capturedStart() == offset) { |
615 | /** |
616 | * we only need to compute the captured texts if we have real capture groups |
617 | * highlightings should only address %1..%.., see e.g. replaceCaptures |
618 | * DetectChar ignores %0, too |
619 | */ |
620 | int lastCapturedIndex = result.lastCapturedIndex(); |
621 | if (lastCapturedIndex > 0) { |
622 | QStringList captures; |
623 | captures.reserve(lastCapturedIndex); |
624 | // ignore the capturing group number 0 |
625 | for (int i = 1; i <= lastCapturedIndex; ++i) |
626 | captures.push_back(result.captured(i)); |
627 | return MatchResult(offset + result.capturedLength(), std::move(captures)); |
628 | } |
629 | |
630 | /** |
631 | * else: ignore the implicit 0 group we always capture, no need to allocate stuff for that |
632 | */ |
633 | return MatchResult(offset + result.capturedLength()); |
634 | } |
635 | |
636 | /** |
637 | * no match |
638 | * we can always compute the skip offset as the highlighter will invalidate the cache for changed captures for dynamic rules! |
639 | */ |
640 | return MatchResult(offset, result.capturedStart()); |
641 | } |
642 | |
643 | RegExpr::RegExpr(const HighlightingContextData::Rule::RegExpr &data) |
644 | : m_regexp(data.pattern, makePattenOptions(data)) |
645 | { |
646 | m_hasSkipOffset = true; |
647 | } |
648 | |
649 | void RegExpr::resolve() |
650 | { |
651 | m_isResolved = true; |
652 | |
653 | resolveRegex(m_regexp, context().context()); |
654 | } |
655 | |
656 | MatchResult RegExpr::doMatch(QStringView text, int offset, const QStringList &) const |
657 | { |
658 | if (Q_UNLIKELY(!m_isResolved)) { |
659 | const_cast<RegExpr *>(this)->resolve(); |
660 | } |
661 | |
662 | return regexMatch(m_regexp, text, offset); |
663 | } |
664 | |
665 | DynamicRegExpr::DynamicRegExpr(const HighlightingContextData::Rule::RegExpr &data) |
666 | : m_pattern(data.pattern) |
667 | , m_patternOptions(makePattenOptions(data)) |
668 | { |
669 | m_dynamic = true; |
670 | m_hasSkipOffset = true; |
671 | } |
672 | |
673 | void DynamicRegExpr::resolve() |
674 | { |
675 | m_isResolved = true; |
676 | |
677 | QRegularExpression regexp(m_pattern, m_patternOptions); |
678 | resolveRegex(regexp, context().context()); |
679 | m_patternOptions = regexp.patternOptions(); |
680 | } |
681 | |
682 | MatchResult DynamicRegExpr::doMatch(QStringView text, int offset, const QStringList &captures) const |
683 | { |
684 | if (Q_UNLIKELY(!m_isResolved)) { |
685 | const_cast<DynamicRegExpr *>(this)->resolve(); |
686 | } |
687 | |
688 | /** |
689 | * create new pattern with right instantiation |
690 | */ |
691 | const QRegularExpression regexp(replaceCaptures(m_pattern, captures, true), m_patternOptions); |
692 | |
693 | return regexMatch(regexp, text, offset); |
694 | } |
695 | |
696 | StringDetect::StringDetect(const HighlightingContextData::Rule::StringDetect &data) |
697 | : m_string(data.string) |
698 | , m_caseSensitivity(data.caseSensitivity) |
699 | { |
700 | } |
701 | |
702 | MatchResult StringDetect::doMatch(QStringView text, int offset, const QStringList &) const |
703 | { |
704 | return matchString(m_string, text, offset, m_caseSensitivity); |
705 | } |
706 | |
707 | DynamicStringDetect::DynamicStringDetect(const HighlightingContextData::Rule::StringDetect &data) |
708 | : m_string(data.string) |
709 | , m_caseSensitivity(data.caseSensitivity) |
710 | { |
711 | m_dynamic = true; |
712 | } |
713 | |
714 | MatchResult DynamicStringDetect::doMatch(QStringView text, int offset, const QStringList &captures) const |
715 | { |
716 | /** |
717 | * for dynamic case: create new pattern with right instantiation |
718 | */ |
719 | const auto pattern = replaceCaptures(m_string, captures, false); |
720 | return matchString(pattern, text, offset, m_caseSensitivity); |
721 | } |
722 | |
723 | WordDetect::WordDetect(DefinitionData &def, const HighlightingContextData::Rule::WordDetect &data) |
724 | : m_wordDelimiters(def.wordDelimiters) |
725 | , m_word(data.word) |
726 | , m_caseSensitivity(data.caseSensitivity) |
727 | { |
728 | resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters); |
729 | } |
730 | |
731 | MatchResult WordDetect::doMatch(QStringView text, int offset, const QStringList &) const |
732 | { |
733 | if (text.size() - offset < m_word.size()) { |
734 | return offset; |
735 | } |
736 | |
737 | /** |
738 | * detect delimiter characters on the inner and outer boundaries of the string |
739 | * NOTE: m_word isn't empty |
740 | */ |
741 | if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1)) && !m_wordDelimiters.contains(text.at(offset))) { |
742 | return offset; |
743 | } |
744 | |
745 | if (text.mid(offset, m_word.size()).compare(m_word, m_caseSensitivity) != 0) { |
746 | return offset; |
747 | } |
748 | |
749 | if (text.size() == offset + m_word.size() || m_wordDelimiters.contains(text.at(offset + m_word.size())) |
750 | || m_wordDelimiters.contains(text.at(offset + m_word.size() - 1))) { |
751 | return offset + m_word.size(); |
752 | } |
753 | |
754 | return offset; |
755 | } |
756 | |