1/*
2 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
3 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
4
5 SPDX-License-Identifier: MIT
6*/
7
8#include <QBuffer>
9#include <QCborValue>
10#include <QCoreApplication>
11#include <QDebug>
12#include <QFile>
13#include <QFileInfo>
14#include <QMutableMapIterator>
15#include <QRegularExpression>
16#include <QScopeGuard>
17#include <QString>
18#include <QVariant>
19#include <QXmlStreamReader>
20
21#ifdef HAS_XERCESC
22
23#include <xercesc/framework/MemBufInputSource.hpp>
24#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
25
26#include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
27
28#include <xercesc/sax/ErrorHandler.hpp>
29#include <xercesc/sax/SAXParseException.hpp>
30
31#include <xercesc/util/PlatformUtils.hpp>
32#include <xercesc/util/XMLString.hpp>
33#include <xercesc/util/XMLUni.hpp>
34
35#include <xercesc/framework/XMLGrammarPoolImpl.hpp>
36#include <xercesc/validators/common/Grammar.hpp>
37
38using namespace xercesc;
39
40/*
41 * Ideas taken from:
42 *
43 * author : Boris Kolpackov <boris@codesynthesis.com>
44 * copyright : not copyrighted - public domain
45 *
46 * This program uses Xerces-C++ SAX2 parser to load a set of schema files
47 * and then to validate a set of XML documents against these schemas. To
48 * build this program you will need Xerces-C++ 3.0.0 or later. For more
49 * information, see:
50 *
51 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
52 */
53
54/**
55 * Error handler object used during xml schema validation.
56 */
57class CustomErrorHandler : public ErrorHandler
58{
59public:
60 /**
61 * Constructor
62 * @param messages Pointer to the error message string to fill.
63 */
64 CustomErrorHandler(QString *messages)
65 : m_messages(messages)
66 {
67 }
68
69 /**
70 * Check global success/fail state.
71 * @return True if there was a failure, false otherwise.
72 */
73 bool failed() const
74 {
75 return m_failed;
76 }
77
78private:
79 /**
80 * Severity classes for error messages.
81 */
82 enum severity { s_warning, s_error, s_fatal };
83
84 /**
85 * Wrapper for warning exceptions.
86 * @param e Exception to handle.
87 */
88 void warning(const SAXParseException &e) override
89 {
90 m_failed = true; // be strict, warnings are evil, too!
91 handle(e, s_warning);
92 }
93
94 /**
95 * Wrapper for error exceptions.
96 * @param e Exception to handle.
97 */
98 void error(const SAXParseException &e) override
99 {
100 m_failed = true;
101 handle(e, s_error);
102 }
103
104 /**
105 * Wrapper for fatal error exceptions.
106 * @param e Exception to handle.
107 */
108 void fatalError(const SAXParseException &e) override
109 {
110 m_failed = true;
111 handle(e, s_fatal);
112 }
113
114 /**
115 * Reset the error status to "no error".
116 */
117 void resetErrors() override
118 {
119 m_failed = false;
120 }
121
122 /**
123 * Generic handler for error/warning/fatal error message exceptions.
124 * @param e Exception to handle.
125 * @param s Enum value encoding the message severtity.
126 */
127 void handle(const SAXParseException &e, severity s)
128 {
129 // get id to print
130 const XMLCh *xid(e.getPublicId());
131 if (!xid)
132 xid = e.getSystemId();
133
134 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
135 << QString::fromUtf16(e.getMessage()) << Qt::endl;
136 }
137
138private:
139 /**
140 * Storage for created error messages in this handler.
141 */
142 QTextStream m_messages;
143
144 /**
145 * Global error state. True if there was an error, false otherwise.
146 */
147 bool m_failed = false;
148};
149
150class CustomXMLValidator : public SAX2XMLReaderImpl
151{
152public:
153 QString messages;
154 CustomErrorHandler eh{&messages};
155
156 CustomXMLValidator(XMLGrammarPool *xsd)
157 : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd)
158 {
159 // Commonly useful configuration.
160 //
161 setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
162 setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
163 setFeature(XMLUni::fgSAX2CoreValidation, true);
164
165 // Enable validation.
166 //
167 setFeature(XMLUni::fgXercesSchema, true);
168 setFeature(XMLUni::fgXercesSchemaFullChecking, true);
169 setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
170
171 // Use the loaded grammar during parsing.
172 //
173 setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
174
175 // Don't load schemas from any other source (e.g., from XML document's
176 // xsi:schemaLocation attributes).
177 //
178 setFeature(XMLUni::fgXercesLoadSchema, false);
179
180 // Xerces-C++ 3.1.0 is the first version with working multi import
181 // support.
182 //
183 setFeature(XMLUni::fgXercesHandleMultipleImports, true);
184
185 setErrorHandler(&eh);
186 }
187};
188
189#endif
190
191#include "../lib/worddelimiters_p.h"
192#include "../lib/xml_p.h"
193
194#include <array>
195
196using KSyntaxHighlighting::WordDelimiters;
197using KSyntaxHighlighting::Xml::attrToBool;
198
199using namespace Qt::Literals::StringLiterals;
200
201#if QT_VERSION < QT_VERSION_CHECK(6, 10, 0)
202static constexpr QStringView operator""_sv(const char16_t *s, std::size_t n)
203{
204 return QStringView(s, s + n);
205}
206#endif
207
208namespace
209{
210
211struct KateVersion {
212 uint majorRevision;
213 uint minorRevision;
214
215 KateVersion(uint majorRevision = 0, uint minorRevision = 0)
216 : majorRevision(majorRevision)
217 , minorRevision(minorRevision)
218 {
219 }
220
221 bool operator<(const KateVersion &version) const
222 {
223 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
224 }
225};
226
227class HlFilesChecker
228{
229public:
230 void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames, bool generated)
231 {
232 m_currentDefinition = &*m_definitions.insert(key: name, value: Definition{});
233 m_currentDefinition->languageName = name;
234 m_currentDefinition->filename = filename;
235 m_currentDefinition->kateVersionStr = verStr.toString();
236 m_currentDefinition->generated = generated;
237 m_currentKeywords = nullptr;
238 m_currentContext = nullptr;
239
240 const auto idx = verStr.indexOf(c: u'.');
241 bool okVersion = idx > 0;
242 if (okVersion) {
243 m_currentDefinition->kateVersion = {
244 verStr.sliced(pos: 0, n: idx).toUInt(ok: &okVersion),
245 verStr.sliced(pos: idx + 1).toUInt(ok: &okVersion),
246 };
247 }
248 if (!okVersion) {
249 qWarning() << filename << "invalid kateversion. The expected format is 'major.minor' (e.g. \"5.79\")." << verStr;
250 m_success = false;
251 // continue with a recent version to avoid warning such as "available from XXX"
252 m_currentDefinition->kateVersion = {9999999, 0};
253 }
254
255 auto checkName = [this, &filename](char const *nameType, const QString &name) {
256 auto it = m_names.find(key: name);
257 if (it != m_names.end()) {
258 qWarning() << filename << "duplicate" << nameType << "with" << it.value();
259 m_success = false;
260 } else {
261 m_names.insert(key: name, value: filename);
262 }
263 };
264 checkName("name", name);
265 for (const auto &alternativeName : alternativeNames) {
266 checkName("alternative name", alternativeName);
267 }
268 }
269
270 KateVersion currentVersion() const
271 {
272 return m_currentDefinition->kateVersion;
273 }
274
275 void processElement(const QXmlStreamReader &xml)
276 {
277 switch (xml.tokenType()) {
278 case QXmlStreamReader::StartElement:
279 if (m_currentContext) {
280 m_currentContext->rules.push_back(t: Context::Rule{});
281 auto &rule = m_currentContext->rules.back();
282 m_success = rule.parseElement(filename: m_currentDefinition->filename, xml) && m_success;
283 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
284 } else if (m_currentKeywords) {
285 m_inKeywordItem = true;
286 } else if (xml.name() == u"context"_sv) {
287 processContextElement(xml);
288 } else if (xml.name() == u"list"_sv) {
289 processListElement(xml);
290 } else if (xml.name() == u"keywords"_sv) {
291 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
292 } else if (xml.name() == u"emptyLine"_sv) {
293 m_success = parseEmptyLine(filename: m_currentDefinition->filename, xml) && m_success;
294 } else if (xml.name() == u"itemData"_sv) {
295 m_success = m_currentDefinition->itemDatas.parseElement(filename: m_currentDefinition->filename, xml) && m_success;
296 }
297 break;
298
299 case QXmlStreamReader::EndElement:
300 if (m_currentContext && xml.name() == u"context"_sv) {
301 m_currentContext = nullptr;
302 } else if (m_currentKeywords && xml.name() == u"list"_sv) {
303 m_currentKeywords = nullptr;
304 } else if (m_currentKeywords) {
305 m_success = m_currentKeywords->items.parseElement(filename: m_currentDefinition->filename, xml, content: m_textContent) && m_success;
306 m_textContent.clear();
307 m_inKeywordItem = false;
308 }
309 break;
310
311 case QXmlStreamReader::EntityReference:
312 case QXmlStreamReader::Characters:
313 if (m_inKeywordItem) {
314 m_textContent += xml.text();
315 }
316 break;
317
318 default:;
319 }
320 }
321
322 //! Resolve context attribute and include tag
323 void resolveContexts()
324 {
325 QMutableMapIterator<QString, Definition> def(m_definitions);
326 while (def.hasNext()) {
327 def.next();
328 auto &definition = def.value();
329 auto &contexts = definition.contexts;
330
331 if (contexts.isEmpty()) {
332 qWarning() << definition.filename << "has no context";
333 m_success = false;
334 continue;
335 }
336
337 auto markAsUsedContext = [](ContextName &contextName) {
338 for (auto *ctx : contextName.contexts) {
339 ctx->isOnlyIncluded = false;
340 }
341 };
342
343 QMutableMapIterator<QString, Context> contextIt(contexts);
344 while (contextIt.hasNext()) {
345 contextIt.next();
346 auto &context = contextIt.value();
347 resolveContextName(definition, contextName&: context.lineEndContext, line: context.line, attrName: "lineEndContext");
348 resolveContextName(definition, contextName&: context.lineEmptyContext, line: context.line, attrName: "lineEmptyContext");
349 resolveContextName(definition, contextName&: context.fallthroughContext, line: context.line, attrName: "fallthroughContext");
350 markAsUsedContext(context.lineEndContext);
351 markAsUsedContext(context.lineEmptyContext);
352 markAsUsedContext(context.fallthroughContext);
353 for (auto &rule : context.rules) {
354 rule.parentContext = &context;
355 if (rule.type != Context::Rule::Type::IncludeRules) {
356 resolveContextName(definition, contextName&: rule.context, line: rule.line, attrName: "context");
357 markAsUsedContext(rule.context);
358 } else {
359 auto *ctx = resolveContextPartName(definition, contextNamePart: rule.context.name, line: rule.line, attrName: "context", originalContext: rule.context.name);
360 if (ctx) {
361 rule.context.contexts.push_back(t: ctx);
362 if (rule.includeAttrib == XmlBool::True) {
363 ctx->referencedWithIncludeAttrib = true;
364 }
365 }
366 }
367 }
368 }
369
370 auto *firstContext = &*definition.contexts.find(key: definition.firstContextName);
371 firstContext->isOnlyIncluded = false;
372 definition.firstContext = firstContext;
373 }
374
375 resolveIncludeRules();
376 }
377
378 bool check() const
379 {
380 bool success = m_success;
381
382 const auto usedContexts = extractUsedContexts();
383
384 QMap<const Definition *, const Definition *> maxVersionByDefinitions;
385 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
386
387 QMapIterator<QString, Definition> def(m_definitions);
388 while (def.hasNext()) {
389 def.next();
390 const auto &definition = def.value();
391 const auto &filename = definition.filename;
392
393 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
394 if (maxDef != &definition) {
395 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
396 << ". Please, increase kateversion.";
397 success = false;
398 }
399
400 QSet<ItemDatas::Style> usedAttributeNames;
401 QSet<ItemDatas::Style> ignoredAttributeNames;
402 success = checkKeywordsList(definition) && success;
403 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
404
405 // search for non-existing itemDatas.
406 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
407 for (const auto &styleName : invalidNames) {
408 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
409 success = false;
410 }
411
412 if (!definition.generated) {
413 // search for existing itemDatas, but unusable.
414 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
415 for (const auto &styleName : ignoredNames) {
416 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
417 << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
418 success = false;
419 }
420
421 // search for unused itemDatas.
422 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
423 unusedNames -= ignoredNames;
424 for (const auto &styleName : std::as_const(t&: unusedNames)) {
425 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
426 success = false;
427 }
428 }
429 }
430
431 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
432 while (unreachableIncludedRuleIt.hasNext()) {
433 unreachableIncludedRuleIt.next();
434 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
435 if (unreachableRulesBy.alwaysUnreachable) {
436 auto *rule = unreachableIncludedRuleIt.key();
437
438 if (!rule->parentContext->isOnlyIncluded) {
439 continue;
440 }
441
442 // remove duplicates rules
443 QSet<const Context::Rule *> rules;
444 auto &unreachableBy = unreachableRulesBy.unreachableBy;
445 unreachableBy.erase(abegin: std::remove_if(first: unreachableBy.begin(),
446 last: unreachableBy.end(),
447 pred: [&](const RuleAndInclude &ruleAndInclude) {
448 if (rules.contains(value: ruleAndInclude.rule)) {
449 return true;
450 }
451 rules.insert(value: ruleAndInclude.rule);
452 return false;
453 }),
454 aend: unreachableBy.end());
455
456 QString message;
457 message.reserve(asize: 128);
458 for (auto &ruleAndInclude : std::as_const(t&: unreachableBy)) {
459 message += u"line "_sv;
460 message += QString::number(ruleAndInclude.rule->line);
461 message += u" ["_sv;
462 message += ruleAndInclude.rule->parentContext->name;
463 if (rule->filename != ruleAndInclude.rule->filename) {
464 message += u" ("_sv;
465 message += ruleAndInclude.rule->filename;
466 message += u')';
467 }
468 if (ruleAndInclude.includeRules) {
469 message += u" via line "_sv;
470 message += QString::number(ruleAndInclude.includeRules->line);
471 }
472 message += u"], "_sv;
473 }
474 message.chop(n: 2);
475
476 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
477 success = false;
478 }
479 }
480
481 return success;
482 }
483
484private:
485 enum class XmlBool {
486 Unspecified,
487 False,
488 True,
489 };
490
491 struct Context;
492
493 using ContextList = QVarLengthArray<Context *, 2>;
494
495 struct ContextName {
496 QString name;
497 int popCount = 0;
498 bool stay = false;
499
500 ContextList contexts{};
501 };
502
503 struct Parser {
504 const QString &filename;
505 const QXmlStreamReader &xml;
506 const QXmlStreamAttribute &attr;
507 bool success;
508
509 //! Read a string type attribute, \c success = \c false when \p str is not empty
510 //! \return \c true when attr.name() == attrName, otherwise false
511 bool extractString(QString &str, QStringView attrName)
512 {
513 if (attr.name() != attrName) {
514 return false;
515 }
516
517 str = attr.value().toString();
518 if (str.isEmpty()) {
519 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
520 success = false;
521 }
522
523 return true;
524 }
525
526 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
527 //! \return \c true when attr.name() == attrName, otherwise false
528 bool extractXmlBool(XmlBool &xmlBool, QStringView attrName)
529 {
530 if (attr.name() != attrName) {
531 return false;
532 }
533
534 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(str: attr.value()) ? XmlBool::True : XmlBool::False;
535
536 return true;
537 }
538
539 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
540 //! \return \c true when attr.name() == attrName, otherwise false
541 bool extractPositive(int &positive, QStringView attrName)
542 {
543 if (attr.name() != attrName) {
544 return false;
545 }
546
547 bool ok = true;
548 positive = attr.value().toInt(ok: &ok);
549
550 if (!ok || positive < 0) {
551 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
552 success = false;
553 }
554
555 return true;
556 }
557
558 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
559 //! \return \c true when attr.name() == attrName, otherwise false
560 bool checkColor(QStringView attrName)
561 {
562 if (attr.name() != attrName) {
563 return false;
564 }
565
566 const auto value = attr.value();
567 if (value.isEmpty() /*|| QColor(value).isValid()*/) {
568 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
569 success = false;
570 }
571
572 return true;
573 }
574
575 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
576 //! \return \c true when attr.name() == attrName, otherwise false
577 bool extractChar(QChar &c, QStringView attrName)
578 {
579 if (attr.name() != attrName) {
580 return false;
581 }
582
583 if (attr.value().size() == 1) {
584 c = attr.value()[0];
585 } else {
586 c = u'_';
587 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
588 success = false;
589 }
590
591 return true;
592 }
593
594 //! \return parsing status when \p isExtracted is \c true, otherwise \c false
595 bool checkIfExtracted(bool isExtracted)
596 {
597 if (isExtracted) {
598 return success;
599 }
600
601 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
602 return false;
603 }
604 };
605
606 struct Keywords {
607 struct Items {
608 struct Item {
609 QString content;
610 int line;
611
612 friend size_t qHash(const Item &item, size_t seed = 0)
613 {
614 return qHash(key: item.content, seed);
615 }
616
617 friend bool operator==(const Item &item0, const Item &item1)
618 {
619 return item0.content == item1.content;
620 }
621 };
622
623 QList<Item> keywords;
624 QSet<Item> includes;
625
626 bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content)
627 {
628 bool success = true;
629
630 const int line = xml.lineNumber();
631
632 if (content.isEmpty()) {
633 qWarning() << filename << "line" << line << "is empty:" << xml.name();
634 success = false;
635 }
636
637 if (xml.name() == u"include"_sv) {
638 includes.insert(value: {.content: content, .line: line});
639 } else if (xml.name() == u"item"_sv) {
640 keywords.append(t: {.content: content, .line: line});
641 } else {
642 qWarning() << filename << "line" << line << "invalid element:" << xml.name();
643 success = false;
644 }
645
646 return success;
647 }
648 };
649
650 QString name;
651 Items items;
652 int line;
653
654 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
655 {
656 line = xml.lineNumber();
657
658 bool success = true;
659 const auto attrs = xml.attributes();
660 for (const auto &attr : attrs) {
661 Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success};
662
663 const bool isExtracted = parser.extractString(str&: name, attrName: u"name"_sv);
664
665 success = parser.checkIfExtracted(isExtracted);
666 }
667 return success;
668 }
669 };
670
671 struct Context {
672 struct Rule {
673 enum class Type {
674 Unknown,
675 AnyChar,
676 Detect2Chars,
677 DetectChar,
678 DetectIdentifier,
679 DetectSpaces,
680 Float,
681 HlCChar,
682 HlCHex,
683 HlCOct,
684 HlCStringChar,
685 IncludeRules,
686 Int,
687 LineContinue,
688 RangeDetect,
689 RegExpr,
690 StringDetect,
691 WordDetect,
692 keyword,
693 };
694
695 Type type{};
696
697 bool isDotRegex = false;
698 int line = -1;
699
700 // commonAttributes
701 QString attribute;
702 ContextName context;
703 QString beginRegion;
704 QString endRegion;
705 int column = -1;
706 XmlBool lookAhead{};
707 XmlBool firstNonSpace{};
708
709 // StringDetect, WordDetect, keyword
710 XmlBool insensitive{};
711
712 // DetectChar, StringDetect, RegExpr, keyword
713 XmlBool dynamic{};
714
715 // Regex
716 XmlBool minimal{};
717
718 // IncludeRule
719 XmlBool includeAttrib{};
720
721 // DetectChar, Detect2Chars, LineContinue, RangeDetect
722 QChar char0;
723 // Detect2Chars, RangeDetect
724 QChar char1;
725
726 // AnyChar, StringDetect, RegExpr, WordDetect, keyword
727 QString string;
728 // RegExpr without .* as suffix
729 QString sanitizedString;
730
731 // Float, HlCHex, HlCOct, Int, WordDetect, keyword
732 QString additionalDeliminator;
733 QString weakDeliminator;
734
735 // rules included by IncludeRules (without IncludeRule)
736 QList<const Rule *> includedRules;
737
738 // IncludeRules included by IncludeRules
739 QSet<const Rule *> includedIncludeRules;
740
741 Context const *parentContext = nullptr;
742
743 QString filename;
744
745 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
746 {
747 this->filename = filename;
748 line = xml.lineNumber();
749
750 using Pair = QPair<QStringView, Type>;
751 static const auto pairs = {
752 Pair{u"AnyChar"_sv, Type::AnyChar},
753 Pair{u"Detect2Chars"_sv, Type::Detect2Chars},
754 Pair{u"DetectChar"_sv, Type::DetectChar},
755 Pair{u"DetectIdentifier"_sv, Type::DetectIdentifier},
756 Pair{u"DetectSpaces"_sv, Type::DetectSpaces},
757 Pair{u"Float"_sv, Type::Float},
758 Pair{u"HlCChar"_sv, Type::HlCChar},
759 Pair{u"HlCHex"_sv, Type::HlCHex},
760 Pair{u"HlCOct"_sv, Type::HlCOct},
761 Pair{u"HlCStringChar"_sv, Type::HlCStringChar},
762 Pair{u"IncludeRules"_sv, Type::IncludeRules},
763 Pair{u"Int"_sv, Type::Int},
764 Pair{u"LineContinue"_sv, Type::LineContinue},
765 Pair{u"RangeDetect"_sv, Type::RangeDetect},
766 Pair{u"RegExpr"_sv, Type::RegExpr},
767 Pair{u"StringDetect"_sv, Type::StringDetect},
768 Pair{u"WordDetect"_sv, Type::WordDetect},
769 Pair{u"keyword", Type::keyword},
770 };
771
772 for (auto pair : pairs) {
773 if (xml.name() == pair.first) {
774 type = pair.second;
775 bool success = parseAttributes(filename, xml);
776 success = checkMandoryAttributes(filename, xml) && success;
777 if (success && type == Type::RegExpr) {
778 // ., (.) followed by *, +, {1} or nothing
779 static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
780 // remove "(?:" and ")"
781 static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))"));
782 // remove parentheses on a copy of string
783 auto reg = QString(string).replace(re: removeParentheses, after: QString());
784 isDotRegex = reg.contains(re: isDot);
785
786 // Remove .* and .*$ suffix.
787 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
788 sanitizedString = string;
789 sanitizedString.replace(re: allSuffix, after: QString());
790 // string is a catch-all, do not sanitize
791 if (sanitizedString.isEmpty() || sanitizedString == u"^"_sv) {
792 sanitizedString = string;
793 }
794 }
795 return success;
796 }
797 }
798
799 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
800 return false;
801 }
802
803 private:
804 bool parseAttributes(const QString &filename, const QXmlStreamReader &xml)
805 {
806 bool success = true;
807
808 const auto attrs = xml.attributes();
809 for (const auto &attr : attrs) {
810 Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success};
811
812 // clang-format off
813 const bool isExtracted
814 = parser.extractString(str&: attribute, attrName: u"attribute"_sv)
815 || parser.extractString(str&: context.name, attrName: u"context"_sv)
816 || parser.extractXmlBool(xmlBool&: lookAhead, attrName: u"lookAhead"_sv)
817 || parser.extractXmlBool(xmlBool&: firstNonSpace, attrName: u"firstNonSpace"_sv)
818 || parser.extractString(str&: beginRegion, attrName: u"beginRegion"_sv)
819 || parser.extractString(str&: endRegion, attrName: u"endRegion"_sv)
820 || parser.extractPositive(positive&: column, attrName: u"column"_sv)
821 || ((type == Type::RegExpr
822 || type == Type::StringDetect
823 || type == Type::WordDetect
824 || type == Type::keyword
825 ) && parser.extractXmlBool(xmlBool&: insensitive, attrName: u"insensitive"_sv))
826 || ((type == Type::DetectChar
827 || type == Type::RegExpr
828 || type == Type::StringDetect
829 || type == Type::keyword
830 ) && parser.extractXmlBool(xmlBool&: dynamic, attrName: u"dynamic"_sv))
831 || ((type == Type::RegExpr)
832 && parser.extractXmlBool(xmlBool&: minimal, attrName: u"minimal"_sv))
833 || ((type == Type::DetectChar
834 || type == Type::Detect2Chars
835 || type == Type::LineContinue
836 || type == Type::RangeDetect
837 ) && parser.extractChar(c&: char0, attrName: u"char"_sv))
838 || ((type == Type::Detect2Chars
839 || type == Type::RangeDetect
840 ) && parser.extractChar(c&: char1, attrName: u"char1"_sv))
841 || ((type == Type::AnyChar
842 || type == Type::RegExpr
843 || type == Type::StringDetect
844 || type == Type::WordDetect
845 || type == Type::keyword
846 ) && parser.extractString(str&: string, attrName: u"String"_sv))
847 || ((type == Type::IncludeRules)
848 && parser.extractXmlBool(xmlBool&: includeAttrib, attrName: u"includeAttrib"_sv))
849 || ((type == Type::Float
850 || type == Type::HlCHex
851 || type == Type::HlCOct
852 || type == Type::Int
853 || type == Type::keyword
854 || type == Type::WordDetect
855 ) && (parser.extractString(str&: additionalDeliminator, attrName: u"additionalDeliminator"_sv)
856 || parser.extractString(str&: weakDeliminator, attrName: u"weakDeliminator"_sv)))
857 ;
858 // clang-format on
859
860 success = parser.checkIfExtracted(isExtracted);
861 }
862
863 if (type == Type::LineContinue && char0 == u'\0') {
864 char0 = u'\\';
865 }
866
867 return success;
868 }
869
870 bool checkMandoryAttributes(const QString &filename, const QXmlStreamReader &xml)
871 {
872 QString missingAttr;
873
874 switch (type) {
875 case Type::Unknown:
876 return false;
877
878 case Type::AnyChar:
879 case Type::RegExpr:
880 case Type::StringDetect:
881 case Type::WordDetect:
882 case Type::keyword:
883 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
884 break;
885
886 case Type::DetectChar:
887 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
888 break;
889
890 case Type::Detect2Chars:
891 case Type::RangeDetect:
892 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
893 : !char0.unicode() ? QStringLiteral("char")
894 : !char1.unicode() ? QStringLiteral("char1")
895 : QString();
896 break;
897
898 case Type::IncludeRules:
899 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
900 break;
901
902 case Type::DetectIdentifier:
903 case Type::DetectSpaces:
904 case Type::Float:
905 case Type::HlCChar:
906 case Type::HlCHex:
907 case Type::HlCOct:
908 case Type::HlCStringChar:
909 case Type::Int:
910 case Type::LineContinue:
911 break;
912 }
913
914 if (!missingAttr.isEmpty()) {
915 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
916 return false;
917 }
918
919 return true;
920 }
921 };
922
923 int line;
924 // becomes false when a context (except includeRule) refers to it
925 bool isOnlyIncluded = true;
926 // becomes true when an includedRule refers to it with includeAttrib=true
927 bool referencedWithIncludeAttrib = false;
928 bool hasDynamicRule = false;
929 QString name;
930 QString attribute;
931 ContextName lineEndContext;
932 ContextName lineEmptyContext;
933 ContextName fallthroughContext;
934 QList<Rule> rules;
935 XmlBool dynamic{};
936 XmlBool fallthrough{};
937 XmlBool stopEmptyLineContextSwitchLoop{};
938
939 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
940 {
941 line = xml.lineNumber();
942
943 bool success = true;
944
945 const auto attrs = xml.attributes();
946 for (const auto &attr : attrs) {
947 Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success};
948 XmlBool noIndentationBasedFolding{};
949
950 // clang-format off
951 const bool isExtracted = parser.extractString(str&: name, attrName: u"name"_sv)
952 || parser.extractString(str&: attribute, attrName: u"attribute"_sv)
953 || parser.extractString(str&: lineEndContext.name, attrName: u"lineEndContext"_sv)
954 || parser.extractString(str&: lineEmptyContext.name, attrName: u"lineEmptyContext"_sv)
955 || parser.extractString(str&: fallthroughContext.name, attrName: u"fallthroughContext"_sv)
956 || parser.extractXmlBool(xmlBool&: dynamic, attrName: u"dynamic"_sv)
957 || parser.extractXmlBool(xmlBool&: fallthrough, attrName: u"fallthrough"_sv)
958 || parser.extractXmlBool(xmlBool&: stopEmptyLineContextSwitchLoop, attrName: u"stopEmptyLineContextSwitchLoop"_sv)
959 || parser.extractXmlBool(xmlBool&: noIndentationBasedFolding, attrName: u"noIndentationBasedFolding"_sv);
960 // clang-format on
961
962 success = parser.checkIfExtracted(isExtracted);
963 }
964
965 if (name.isEmpty()) {
966 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
967 success = false;
968 } else if (name.contains(c: u'!')) {
969 qWarning() << filename << "line" << xml.lineNumber() << "the name contains '!', which is a character used to refer to multiple contexts";
970 success = false;
971 }
972
973 if (attribute.isEmpty()) {
974 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
975 success = false;
976 }
977
978 return success;
979 }
980 };
981
982 struct ItemDatas {
983 struct Style {
984 QString name;
985 int line;
986
987 friend size_t qHash(const Style &style, size_t seed = 0)
988 {
989 return qHash(key: style.name, seed);
990 }
991
992 friend bool operator==(const Style &style0, const Style &style1)
993 {
994 return style0.name == style1.name;
995 }
996 };
997
998 QSet<Style> styleNames;
999
1000 bool parseElement(const QString &filename, const QXmlStreamReader &xml)
1001 {
1002 bool success = true;
1003
1004 QString name;
1005 QString defStyleNum;
1006 XmlBool boolean;
1007
1008 const auto attrs = xml.attributes();
1009 for (const auto &attr : attrs) {
1010 Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success};
1011
1012 // clang-format off
1013 const bool isExtracted
1014 = parser.extractString(str&: name, attrName: u"name"_sv)
1015 || parser.extractString(str&: defStyleNum, attrName: u"defStyleNum"_sv)
1016 || parser.extractXmlBool(xmlBool&: boolean, attrName: u"bold"_sv)
1017 || parser.extractXmlBool(xmlBool&: boolean, attrName: u"italic"_sv)
1018 || parser.extractXmlBool(xmlBool&: boolean, attrName: u"underline"_sv)
1019 || parser.extractXmlBool(xmlBool&: boolean, attrName: u"strikeOut"_sv)
1020 || parser.extractXmlBool(xmlBool&: boolean, attrName: u"spellChecking"_sv)
1021 || parser.checkColor(attrName: u"color"_sv)
1022 || parser.checkColor(attrName: u"selColor"_sv)
1023 || parser.checkColor(attrName: u"backgroundColor"_sv)
1024 || parser.checkColor(attrName: u"selBackgroundColor"_sv);
1025 // clang-format on
1026
1027 success = parser.checkIfExtracted(isExtracted);
1028 }
1029
1030 if (!name.isEmpty()) {
1031 const auto len = styleNames.size();
1032 styleNames.insert(value: {.name: name, .line: int(xml.lineNumber())});
1033 if (len == styleNames.size()) {
1034 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
1035 success = false;
1036 }
1037 }
1038
1039 return success;
1040 }
1041 };
1042
1043 struct Definition {
1044 QMap<QString, Keywords> keywordsList;
1045 QMap<QString, Context> contexts;
1046 ItemDatas itemDatas;
1047 QString firstContextName;
1048 const Context *firstContext = nullptr;
1049 QString filename;
1050 WordDelimiters wordDelimiters;
1051 KateVersion kateVersion{};
1052 QString kateVersionStr;
1053 QString languageName;
1054 QSet<const Definition *> referencedDefinitions;
1055 bool generated; // unreachability criteria should not be enforced with generated grammars
1056
1057 // Parse <keywords ...>
1058 bool parseKeywords(const QXmlStreamReader &xml)
1059 {
1060 wordDelimiters.append(s: xml.attributes().value(qualifiedName: u"additionalDeliminator"_sv));
1061 wordDelimiters.remove(c: xml.attributes().value(qualifiedName: u"weakDeliminator"_sv));
1062 return true;
1063 }
1064 };
1065
1066 // Parse <context>
1067 void processContextElement(const QXmlStreamReader &xml)
1068 {
1069 Context context;
1070 m_success = context.parseElement(filename: m_currentDefinition->filename, xml) && m_success;
1071 if (m_currentDefinition->firstContextName.isEmpty()) {
1072 m_currentDefinition->firstContextName = context.name;
1073 }
1074 if (m_currentDefinition->contexts.contains(key: context.name)) {
1075 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
1076 m_success = false;
1077 }
1078 m_currentContext = &*m_currentDefinition->contexts.insert(key: context.name, value: context);
1079 }
1080
1081 // Parse <list name="...">
1082 void processListElement(const QXmlStreamReader &xml)
1083 {
1084 Keywords keywords;
1085 m_success = keywords.parseElement(filename: m_currentDefinition->filename, xml) && m_success;
1086 if (m_currentDefinition->keywordsList.contains(key: keywords.name)) {
1087 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
1088 m_success = false;
1089 }
1090 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(key: keywords.name, value: keywords);
1091 }
1092
1093 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1094 {
1095 auto it = maxVersionByDefinitions.find(key: &definition);
1096 if (it != maxVersionByDefinitions.end()) {
1097 return it.value();
1098 } else {
1099 auto it = maxVersionByDefinitions.insert(key: &definition, value: &definition);
1100 for (const auto &referencedDef : definition.referencedDefinitions) {
1101 auto *maxDef = maxKateVersionDefinition(definition: *referencedDef, maxVersionByDefinitions);
1102 if (it.value()->kateVersion < maxDef->kateVersion) {
1103 it.value() = maxDef;
1104 }
1105 }
1106 return it.value();
1107 }
1108 }
1109
1110 // Initialize the referenced rules (Rule::includedRules)
1111 void resolveIncludeRules()
1112 {
1113 QSet<const Context *> usedContexts;
1114 QList<const Context *> contexts;
1115
1116 QMutableMapIterator<QString, Definition> def(m_definitions);
1117 while (def.hasNext()) {
1118 def.next();
1119 auto &definition = def.value();
1120 QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1121 while (contextIt.hasNext()) {
1122 contextIt.next();
1123 auto &currentContext = contextIt.value();
1124 for (auto &rule : currentContext.rules) {
1125 if (rule.type != Context::Rule::Type::IncludeRules) {
1126 continue;
1127 }
1128
1129 if (rule.context.contexts.isEmpty()) {
1130 continue;
1131 }
1132
1133 // resolve includedRules and includedIncludeRules
1134
1135 usedContexts.clear();
1136 usedContexts.insert(value: rule.context.contexts.front());
1137 contexts.clear();
1138 contexts.append(t: rule.context.contexts.front());
1139
1140 for (int i = 0; i < contexts.size(); ++i) {
1141 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1142 for (const auto &includedRule : contexts[i]->rules) {
1143 if (includedRule.type != Context::Rule::Type::IncludeRules) {
1144 rule.includedRules.append(t: &includedRule);
1145 } else if (&rule == &includedRule) {
1146 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1147 m_success = false;
1148 } else {
1149 rule.includedIncludeRules.insert(value: &includedRule);
1150
1151 if (includedRule.includedRules.isEmpty()) {
1152 for (const auto *context : includedRule.context.contexts) {
1153 if (!usedContexts.contains(value: context)) {
1154 contexts.append(t: context);
1155 usedContexts.insert(value: context);
1156 }
1157 }
1158 } else {
1159 rule.includedRules.append(l: includedRule.includedRules);
1160 }
1161 }
1162 }
1163 }
1164 }
1165 }
1166 }
1167 }
1168
1169 //! Recursively extracts the contexts used from the first context of the definitions.
1170 //! This method detects groups of contexts which are only used among themselves.
1171 QSet<const Context *> extractUsedContexts() const
1172 {
1173 QSet<const Context *> usedContexts;
1174 QList<const Context *> contexts;
1175
1176 QMapIterator<QString, Definition> def(m_definitions);
1177 while (def.hasNext()) {
1178 def.next();
1179 const auto &definition = def.value();
1180
1181 if (definition.firstContext) {
1182 usedContexts.insert(value: definition.firstContext);
1183 contexts.clear();
1184 contexts.append(t: definition.firstContext);
1185
1186 for (int i = 0; i < contexts.size(); ++i) {
1187 auto appendContext = [&](const ContextList &contextList) {
1188 for (auto *context : contextList) {
1189 if (!usedContexts.contains(value: context)) {
1190 contexts.append(t: context);
1191 usedContexts.insert(value: context);
1192 }
1193 }
1194 };
1195
1196 const auto *context = contexts[i];
1197 appendContext(context->lineEndContext.contexts);
1198 appendContext(context->lineEmptyContext.contexts);
1199 appendContext(context->fallthroughContext.contexts);
1200
1201 for (auto &rule : context->rules) {
1202 appendContext(rule.context.contexts);
1203 }
1204 }
1205 }
1206 }
1207
1208 return usedContexts;
1209 }
1210
1211 struct RuleAndInclude {
1212 const Context::Rule *rule;
1213 const Context::Rule *includeRules;
1214
1215 explicit operator bool() const
1216 {
1217 return rule;
1218 }
1219 };
1220
1221 struct IncludedRuleUnreachableBy {
1222 QList<RuleAndInclude> unreachableBy;
1223 bool alwaysUnreachable = true;
1224 };
1225
1226 //! Check contexts and rules
1227 bool checkContexts(const Definition &definition,
1228 QSet<ItemDatas::Style> &usedAttributeNames,
1229 QSet<ItemDatas::Style> &ignoredAttributeNames,
1230 const QSet<const Context *> &usedContexts,
1231 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1232 {
1233 bool success = true;
1234
1235 QMapIterator<QString, Context> contextIt(definition.contexts);
1236 while (contextIt.hasNext()) {
1237 contextIt.next();
1238
1239 const auto &context = contextIt.value();
1240 const auto &filename = definition.filename;
1241
1242 if (!usedContexts.contains(value: &context)) {
1243 if (!definition.generated) {
1244 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1245 success = false;
1246 }
1247 continue;
1248 }
1249
1250 if (context.name.startsWith(s: u"#pop"_sv)) {
1251 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1252 success = false;
1253 }
1254
1255 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1256 usedAttributeNames.insert(value: {.name: context.attribute, .line: context.line});
1257 }
1258
1259 success = checkContextAttribute(definition, context) && success;
1260 if (!definition.generated)
1261 success = checkUreachableRules(filename: definition.filename, context, unreachableIncludedRules) && success;
1262 success = suggestRuleMerger(filename: definition.filename, context) && success;
1263
1264 for (const auto &rule : context.rules) {
1265 if (!rule.attribute.isEmpty()) {
1266 if (rule.lookAhead != XmlBool::True) {
1267 usedAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line});
1268 } else {
1269 ignoredAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line});
1270 }
1271 }
1272 success = checkLookAhead(rule) && success;
1273 success = checkStringDetect(rule) && success;
1274 success = checkWordDetect(rule) && success;
1275 success = checkKeyword(definition, rule) && success;
1276 success = checkRegExpr(filename, rule, context) && success;
1277 if (!definition.generated)
1278 success = checkDelimiters(definition, rule) && success;
1279 }
1280 }
1281
1282 return success;
1283 }
1284
1285 //! Check that a regular expression in a RegExpr rule:
1286 //! - isValid()
1287 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1288 //! - dynamic=true but no place holder used?
1289 //! - is not . with lookAhead="1"
1290 //! - is not ^... without column ou firstNonSpace attribute
1291 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1292 //! - has no unused captures
1293 //! - has no unnecessary quantifier with lookAhead
1294 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1295 {
1296 // ignore empty regex because the error is raised during xml parsing
1297 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1298 const QRegularExpression regexp(rule.string);
1299 if (!checkRegularExpression(filename: rule.filename, regexp, line: rule.line)) {
1300 return false;
1301 }
1302
1303 // dynamic == true and no place holder?
1304 if (rule.dynamic == XmlBool::True) {
1305 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1306 if (!rule.string.contains(re: placeHolder)) {
1307 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1308 return false;
1309 }
1310 }
1311
1312 if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(s: u".*$"_sv) || rule.string.endsWith(s: u".*"_sv)) && -1 == rule.string.indexOf(ch: u'|')) {
1313 qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string;
1314 return false;
1315 }
1316
1317 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1318 if (rule.lookAhead == XmlBool::True) {
1319 static const QRegularExpression removeAllSuffix(QStringLiteral(
1320 R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1321 reg.replace(re: removeAllSuffix, after: QString());
1322 }
1323
1324 reg.replace(QStringLiteral("{1}"), after: QString());
1325 reg.replace(QStringLiteral("{1,1}"), after: QString());
1326
1327 // is DetectSpaces
1328 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1329 static const QRegularExpression isDetectSpaces(
1330 QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1331 if (rule.string.contains(re: isDetectSpaces)) {
1332 char const *extraMsg = rule.string.contains(c: u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1333 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1334 << rule.string;
1335 return false;
1336 }
1337
1338#define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1339#define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1340
1341 // is RangeDetect
1342 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1343 "\\.\\*[?+]?" REG_CHAR "|"
1344 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1345 ")$"));
1346 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(s: u".*?"_sv) || rule.string.contains(s: u"[^"_sv))
1347 && reg.contains(re: isRange)) {
1348 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1349 return false;
1350 }
1351
1352 // is AnyChar
1353 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1354 if (rule.string.contains(re: isAnyChar)) {
1355 auto extra = (reg[0] == u'^' || reg[1] == u'^') ? "with column=\"0\"" : "";
1356 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1357 return false;
1358 }
1359
1360 // is LineContinue
1361 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1362 if (reg.contains(re: isLineContinue)) {
1363 auto extra = (reg[0] == u'^') ? "with column=\"0\"" : "";
1364 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1365 return false;
1366 }
1367
1368#define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))"
1369#define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])"
1370#define REG_DOT uR"((\\[.]|\[.\]))"
1371 // is Int, check \b[0-9]+
1372 static const QRegularExpression isInt(uR"(^(\((\?:)?)*\\b(\((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s);
1373 if (reg.contains(re: isInt)) {
1374 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string;
1375 return false;
1376 }
1377
1378 // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?
1379 static const QRegularExpression isFloat(
1380 uR"(^(\\b|\((\?:)?)*)" REG_DIGITS REG_DOT
1381 REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s);
1382 if (reg.contains(re: isFloat)) {
1383 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string;
1384 return false;
1385 }
1386#undef REG_DOT
1387#undef REG_DIGIT
1388#undef REG_DIGITS
1389
1390 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1391 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1392 reg.replace(re: sanitize1, QStringLiteral("_"));
1393
1394#undef REG_CHAR
1395#undef REG_ESCAPE_CHAR
1396
1397 // use minimal or lazy operator
1398 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1399 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1400
1401 auto hasDynamicRule = [](const ContextList &contexts) {
1402 for (auto *context : contexts) {
1403 if (context->hasDynamicRule) {
1404 return true;
1405 }
1406 }
1407 return false;
1408 };
1409
1410 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: isMinimal) && !reg.contains(re: hasNotGreedy)
1411 && (!hasDynamicRule(rule.context.contexts) || regexp.captureCount() == 0) && (reg.back() != u'$' || reg.contains(c: u'|'))) {
1412 qWarning() << rule.filename << "line" << rule.line
1413 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1414 return false;
1415 }
1416
1417 // replace [:...:] with ___
1418 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1419 reg.replace(re: sanitize2, QStringLiteral("___"));
1420
1421 // replace [ccc...], [special] with ...
1422 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1423 reg.replace(re: sanitize3, QStringLiteral("...\\1"));
1424
1425 // replace [c] with _
1426 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1427 reg.replace(re: sanitize4, QStringLiteral("_"));
1428
1429 const int len = reg.size();
1430 // replace [cC] with _
1431 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1432 reg = reg.toUpper();
1433 reg.replace(re: toInsensitive, after: QString());
1434
1435 // is StringDetect
1436 // ignore (?:, ) and {n}
1437 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)"));
1438 if (reg.contains(re: isStringDetect)) {
1439 char const *extraMsg = rule.string.contains(c: u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1440 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1441 << ":" << rule.string;
1442 if (len != reg.size()) {
1443 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1444 }
1445 return false;
1446 }
1447
1448 // column="0"
1449 if (rule.column == -1) {
1450 // ^ without |
1451 // (^sas*) -> ok
1452 // (^sa|s*) -> ko
1453 // (^(sa|s*)) -> ok
1454 auto first = std::as_const(t&: reg).begin();
1455 auto last = std::as_const(t&: reg).end();
1456 int depth = 0;
1457
1458 while (u'(' == *first) {
1459 ++depth;
1460 ++first;
1461 if (u'?' == *first || u':' == first[1]) {
1462 first += 2;
1463 }
1464 }
1465
1466 if (u'^' == *first) {
1467 const int bolDepth = depth;
1468 bool replace = true;
1469
1470 while (++first != last) {
1471 if (u'(' == *first) {
1472 ++depth;
1473 } else if (u')' == *first) {
1474 --depth;
1475 if (depth < bolDepth) {
1476 // (^a)? === (^a|) -> ko
1477 if (first + 1 != last && u"*?"_sv.contains(c: first[1])) {
1478 replace = false;
1479 break;
1480 }
1481 }
1482 } else if (u'|' == *first) {
1483 // ignore '|' within subgroup
1484 if (depth <= bolDepth) {
1485 replace = false;
1486 break;
1487 }
1488 }
1489 }
1490
1491 if (replace) {
1492 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1493 return false;
1494 }
1495 }
1496 }
1497
1498 // add ^ with column=0
1499 if (rule.column == 0 && !rule.isDotRegex) {
1500 bool hasStartOfLine = false;
1501 auto first = std::as_const(t&: reg).begin();
1502 auto last = std::as_const(t&: reg).end();
1503 for (; first != last; ++first) {
1504 if (*first == u'^') {
1505 hasStartOfLine = true;
1506 break;
1507 } else if (*first == u'(') {
1508 if (last - first >= 3 && first[1] == u'?' && first[2] == u':') {
1509 first += 2;
1510 }
1511 } else {
1512 break;
1513 }
1514 }
1515
1516 if (!hasStartOfLine) {
1517 qWarning() << rule.filename << "line" << rule.line
1518 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1519 return false;
1520 }
1521 }
1522
1523 bool useCapture = false;
1524
1525 // detection of unnecessary capture
1526 if (regexp.captureCount()) {
1527 auto maximalCapture = [](const QStringView(&referenceNames)[9], const QString &s) {
1528 int maxCapture = 9;
1529 while (maxCapture && !s.contains(s: referenceNames[maxCapture - 1])) {
1530 --maxCapture;
1531 }
1532 return maxCapture;
1533 };
1534
1535 int maxCaptureUsed = 0;
1536 // maximal dynamic reference
1537 for (auto *context : rule.context.contexts) {
1538 for (const auto &nextRule : std::as_const(t&: context->rules)) {
1539 if (nextRule.dynamic == XmlBool::True) {
1540 static const QStringView cap[]{
1541 u"%1"_sv,
1542 u"%2"_sv,
1543 u"%3"_sv,
1544 u"%4"_sv,
1545 u"%5"_sv,
1546 u"%6"_sv,
1547 u"%7"_sv,
1548 u"%8"_sv,
1549 u"%9"_sv,
1550 };
1551 int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1552 maxCaptureUsed = std::max(a: maxCaptureUsed, b: maxDynamicCapture);
1553 }
1554 }
1555 }
1556
1557 static const QStringView num1[]{
1558 u"\\1"_sv,
1559 u"\\2"_sv,
1560 u"\\3"_sv,
1561 u"\\4"_sv,
1562 u"\\5"_sv,
1563 u"\\6"_sv,
1564 u"\\7"_sv,
1565 u"\\8"_sv,
1566 u"\\9"_sv,
1567 };
1568 static const QStringView num2[]{
1569 u"\\g1"_sv,
1570 u"\\g2"_sv,
1571 u"\\g3"_sv,
1572 u"\\g4"_sv,
1573 u"\\g5"_sv,
1574 u"\\g6"_sv,
1575 u"\\g7"_sv,
1576 u"\\g8"_sv,
1577 u"\\g9"_sv,
1578 };
1579 const int maxBackReference = std::max(a: maximalCapture(num1, rule.string), b: maximalCapture(num2, rule.string));
1580
1581 const int maxCapture = std::max(a: maxCaptureUsed, b: maxBackReference);
1582
1583 if (maxCapture && regexp.captureCount() > maxCapture) {
1584 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1585 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1586 return false;
1587 }
1588
1589 useCapture = maxCapture;
1590 }
1591
1592 if (!useCapture) {
1593 // is DetectIdentifier
1594 static const QRegularExpression isDetectIdentifier(
1595 QStringLiteral(R"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1596 if (rule.string.contains(re: isDetectIdentifier)) {
1597 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1598 return false;
1599 }
1600 }
1601
1602 if (rule.isDotRegex) {
1603 // search next rule with same column or firstNonSpace
1604 int i = &rule - context.rules.data() + 1;
1605 const bool hasColumn = (rule.column != -1);
1606 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1607 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1608 for (; i < context.rules.size(); ++i) {
1609 auto &rule2 = context.rules[i];
1610 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1611 i = context.rules.size();
1612 break;
1613 }
1614
1615 const bool hasColumn2 = (rule2.column != -1);
1616 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1617 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1618 || (hasFirstNonSpace && hasFirstNonSpace2)) {
1619 break;
1620 }
1621 }
1622
1623 auto ruleFilename = (filename == rule.filename) ? QString() : u"in "_sv + rule.filename;
1624 if (i == context.rules.size()) {
1625 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1626 && rule.endRegion.isEmpty() && !useCapture) {
1627 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1628 << "should be replaced by fallthroughContext:" << rule.string;
1629 }
1630 } else {
1631 auto &nextRule = context.rules[i];
1632 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : u"in "_sv + nextRule.filename;
1633 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1634 << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1635 }
1636
1637 // unnecessary quantifier
1638 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1639 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1640 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1641 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: unnecessaryQuantifier)) {
1642 qWarning() << rule.filename << "line" << rule.line
1643 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1644 return false;
1645 }
1646 }
1647 }
1648
1649 return true;
1650 }
1651
1652 // Parse and check <emptyLine>
1653 bool parseEmptyLine(const QString &filename, const QXmlStreamReader &xml)
1654 {
1655 bool success = true;
1656
1657 QString pattern;
1658 XmlBool casesensitive{};
1659
1660 const auto attrs = xml.attributes();
1661 for (auto &attr : attrs) {
1662 Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success};
1663
1664 const bool isExtracted = parser.extractString(str&: pattern, attrName: u"regexpr"_sv) || parser.extractXmlBool(xmlBool&: casesensitive, attrName: u"casesensitive"_sv);
1665
1666 success = parser.checkIfExtracted(isExtracted);
1667 }
1668
1669 if (pattern.isEmpty()) {
1670 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1671 success = false;
1672 } else {
1673 success = checkRegularExpression(filename, regexp: QRegularExpression(pattern), line: xml.lineNumber());
1674 }
1675
1676 return success;
1677 }
1678
1679 //! Check that a regular expression:
1680 //! - isValid()
1681 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1682 bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1683 {
1684 const auto pattern = regexp.pattern();
1685
1686 // validate regexp
1687 if (!regexp.isValid()) {
1688 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1689 << regexp.patternErrorOffset();
1690 return false;
1691 }
1692
1693 // catch possible case typos: [A-z] or [a-Z]
1694 const int azOffset = std::max(a: pattern.indexOf(s: u"A-z"_sv), b: pattern.indexOf(s: u"a-Z"_sv));
1695 if (azOffset >= 0) {
1696 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1697 return false;
1698 }
1699
1700 return true;
1701 }
1702
1703 //! Check fallthrough and fallthroughContext.
1704 //! Check kateversion for stopEmptyLineContextSwitchLoop.
1705 bool checkContextAttribute(const Definition &definition, const Context &context) const
1706 {
1707 bool success = true;
1708
1709 if (!context.fallthroughContext.name.isEmpty()) {
1710 const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62};
1711 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1712 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1713 << context.name;
1714 success = false;
1715 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1716 qWarning() << definition.filename << "line" << context.line
1717 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1718 << context.name;
1719 success = false;
1720 }
1721 }
1722
1723 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) {
1724 qWarning() << definition.filename << "line" << context.line
1725 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1726 success = false;
1727 }
1728
1729 return success;
1730 }
1731
1732 //! Search for additionalDeliminator/weakDeliminator which has no effect.
1733 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1734 {
1735 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1736 return true;
1737 }
1738
1739 bool success = true;
1740
1741 if (definition.kateVersion < KateVersion{5, 79}) {
1742 qWarning() << definition.filename << "line" << rule.line
1743 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1744 success = false;
1745 }
1746
1747 for (QChar c : rule.additionalDeliminator) {
1748 if (!definition.wordDelimiters.contains(c)) {
1749 return success;
1750 }
1751 }
1752
1753 for (QChar c : rule.weakDeliminator) {
1754 if (definition.wordDelimiters.contains(c)) {
1755 return success;
1756 }
1757 }
1758
1759 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1760 return false;
1761 }
1762
1763 //! Check that keyword rule reference an existing keyword list.
1764 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1765 {
1766 if (rule.type == Context::Rule::Type::keyword) {
1767 auto it = definition.keywordsList.find(key: rule.string);
1768 if (it == definition.keywordsList.end()) {
1769 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1770 return false;
1771 }
1772 }
1773 return true;
1774 }
1775
1776 //! Search for rules with lookAhead="true" and context="#stay".
1777 //! This would cause an infinite loop.
1778 bool checkLookAhead(const Context::Rule &rule) const
1779 {
1780 if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1781 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1782 }
1783 return true;
1784 }
1785
1786 //! Check that StringDetect contains a placeHolder when dynamic="1"
1787 bool checkStringDetect(const Context::Rule &rule) const
1788 {
1789 if (rule.type == Context::Rule::Type::StringDetect) {
1790 // dynamic == true and no place holder?
1791 if (rule.dynamic == XmlBool::True) {
1792 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1793 if (!rule.string.contains(re: placeHolder)) {
1794 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1795 return false;
1796 }
1797 }
1798 }
1799 return true;
1800 }
1801
1802 //! Check that WordDetect does not contain spaces at the beginning and end of text.
1803 bool checkWordDetect(const Context::Rule &rule) const
1804 {
1805 if (rule.type == Context::Rule::Type::WordDetect) {
1806 if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) {
1807 qWarning() << rule.filename << "line" << rule.line << "contains a space at the beginning or end of the string:" << rule.string;
1808 return false;
1809 }
1810 }
1811 return true;
1812 }
1813
1814 //! Check \<include> and delimiter in a keyword list
1815 bool checkKeywordsList(const Definition &definition) const
1816 {
1817 bool success = true;
1818
1819 bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53});
1820 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1821 while (keywordsIt.hasNext()) {
1822 keywordsIt.next();
1823
1824 for (const auto &include : keywordsIt.value().items.includes) {
1825 if (includeNotSupport) {
1826 qWarning() << definition.filename << "line" << include.line
1827 << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1828 success = false;
1829 }
1830 success = checkKeywordInclude(definition, include) && success;
1831 }
1832
1833 // Check that keyword list items do not have deliminator character
1834#if 0
1835 for (const auto& keyword : keywordsIt.value().items.keywords) {
1836 for (QChar c : keyword.content) {
1837 if (definition.wordDelimiters.contains(c)) {
1838 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1839 success = false;
1840 }
1841 }
1842 }
1843#endif
1844 }
1845
1846 return success;
1847 }
1848
1849 //! Search for non-existing keyword include.
1850 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1851 {
1852 bool containsKeywordName = true;
1853 int const idx = include.content.indexOf(s: u"##"_sv);
1854 if (idx == -1) {
1855 auto it = definition.keywordsList.find(key: include.content);
1856 containsKeywordName = (it != definition.keywordsList.end());
1857 } else {
1858 auto defName = include.content.sliced(pos: idx + 2);
1859 auto listName = include.content.sliced(pos: 0, n: idx);
1860 auto it = m_definitions.find(key: defName);
1861 if (it == m_definitions.end()) {
1862 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1863 return false;
1864 }
1865 containsKeywordName = it->keywordsList.contains(key: listName);
1866 }
1867
1868 if (!containsKeywordName) {
1869 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1870 }
1871
1872 return containsKeywordName;
1873 }
1874
1875 //! Check if a rule is hidden by another
1876 //! - rule hidden by DetectChar or AnyChar
1877 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1878 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1879 //! - duplicate rule (Int, Float, keyword with same String, etc)
1880 //! - Rule hidden by a dot regex
1881 bool checkUreachableRules(const QString &filename,
1882 const Context &context,
1883 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1884 {
1885 if (context.isOnlyIncluded) {
1886 return true;
1887 }
1888
1889 struct Rule4 {
1890 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1891 {
1892 auto set = [&](RuleAndInclude &ruleAndInclude) {
1893 auto old = ruleAndInclude;
1894 ruleAndInclude = {.rule: &rule, .includeRules: includeRules};
1895 return old;
1896 };
1897
1898 if (rule.firstNonSpace == XmlBool::True) {
1899 return set(firstNonSpace);
1900 } else if (rule.column == 0) {
1901 return set(column0);
1902 } else if (rule.column > 0) {
1903 return set(columnGreaterThan0[rule.column]);
1904 } else {
1905 return set(normal);
1906 }
1907 }
1908
1909 private:
1910 RuleAndInclude normal;
1911 RuleAndInclude column0;
1912 QMap<int, RuleAndInclude> columnGreaterThan0;
1913 RuleAndInclude firstNonSpace;
1914 };
1915
1916 // Associate QChar with RuleAndInclude
1917 struct CharTable {
1918 /// Search RuleAndInclude associated with @p c.
1919 RuleAndInclude find(QChar c) const
1920 {
1921 if (c.unicode() < 128) {
1922 return m_asciiMap[c.unicode()];
1923 }
1924 auto it = m_utf8Map.find(key: c);
1925 return it == m_utf8Map.end() ? RuleAndInclude{.rule: nullptr, .includeRules: nullptr} : it.value();
1926 }
1927
1928 /// Search RuleAndInclude associated with the characters of @p s.
1929 /// \return an empty QList when at least one character is not found.
1930 QList<RuleAndInclude> find(QStringView s) const
1931 {
1932 QList<RuleAndInclude> result;
1933
1934 for (QChar c : s) {
1935 if (!find(c)) {
1936 return result;
1937 }
1938 }
1939
1940 for (QChar c : s) {
1941 result.append(t: find(c));
1942 }
1943
1944 return result;
1945 }
1946
1947 /// Associates @p c with a rule.
1948 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1949 {
1950 if (c.unicode() < 128) {
1951 m_asciiMap[c.unicode()] = {.rule: &rule, .includeRules: includeRule};
1952 } else {
1953 m_utf8Map[c] = {.rule: &rule, .includeRules: includeRule};
1954 }
1955 }
1956
1957 /// Associates each character of @p s with a rule.
1958 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1959 {
1960 for (QChar c : s) {
1961 append(c, rule, includeRule);
1962 }
1963 }
1964
1965 private:
1966 RuleAndInclude m_asciiMap[127]{};
1967 QMap<QChar, RuleAndInclude> m_utf8Map;
1968 };
1969
1970 struct Char4Tables {
1971 CharTable chars;
1972 CharTable charsColumn0;
1973 QMap<int, CharTable> charsColumnGreaterThan0;
1974 CharTable charsFirstNonSpace;
1975 };
1976
1977 // View on Char4Tables members
1978 struct CharTableArray {
1979 // Append Char4Tables members that satisfies firstNonSpace and column.
1980 // Char4Tables::char is always added.
1981 CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1982 {
1983 if (rule.firstNonSpace == XmlBool::True) {
1984 appendTable(t&: tables.charsFirstNonSpace);
1985 }
1986
1987 if (rule.column == 0) {
1988 appendTable(t&: tables.charsColumn0);
1989 } else if (rule.column > 0) {
1990 appendTable(t&: tables.charsColumnGreaterThan0[rule.column]);
1991 }
1992
1993 appendTable(t&: tables.chars);
1994 }
1995
1996 // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1997 void removeNonSpecialWhenSpecial()
1998 {
1999 if (m_size > 1) {
2000 --m_size;
2001 }
2002 }
2003
2004 /// Search RuleAndInclude associated with @p c.
2005 RuleAndInclude find(QChar c) const
2006 {
2007 for (int i = 0; i < m_size; ++i) {
2008 if (auto ruleAndInclude = m_charTables[i]->find(c)) {
2009 return ruleAndInclude;
2010 }
2011 }
2012 return RuleAndInclude{.rule: nullptr, .includeRules: nullptr};
2013 }
2014
2015 /// Search RuleAndInclude associated with the characters of @p s.
2016 /// \return an empty QList when at least one character is not found.
2017 QList<RuleAndInclude> find(QStringView s) const
2018 {
2019 for (int i = 0; i < m_size; ++i) {
2020 auto result = m_charTables[i]->find(s);
2021 if (result.size()) {
2022 while (++i < m_size) {
2023 result.append(other: m_charTables[i]->find(s));
2024 }
2025 return result;
2026 }
2027 }
2028 return QList<RuleAndInclude>();
2029 }
2030
2031 /// Associates @p c with a rule.
2032 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2033 {
2034 for (int i = 0; i < m_size; ++i) {
2035 m_charTables[i]->append(c, rule, includeRule);
2036 }
2037 }
2038
2039 /// Associates each character of @p s with a rule.
2040 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
2041 {
2042 for (int i = 0; i < m_size; ++i) {
2043 m_charTables[i]->append(s, rule, includeRule);
2044 }
2045 }
2046
2047 private:
2048 void appendTable(CharTable &t)
2049 {
2050 m_charTables[m_size] = &t;
2051 ++m_size;
2052 }
2053
2054 CharTable *m_charTables[3];
2055 int m_size = 0;
2056 };
2057
2058 struct ObservableRule {
2059 const Context::Rule *rule;
2060 const Context::Rule *includeRules;
2061
2062 bool hasResolvedIncludeRules() const
2063 {
2064 return rule == includeRules;
2065 }
2066 };
2067
2068 // Iterates over all the rules, including those in includedRules
2069 struct RuleIterator {
2070 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
2071 : m_end(&endRule - rules.data())
2072 , m_rules(rules)
2073 {
2074 }
2075
2076 /// \return next rule or nullptr
2077 const Context::Rule *next()
2078 {
2079 // if in includedRules
2080 if (m_includedRules) {
2081 ++m_i2;
2082 if (m_i2 != m_includedRules->size()) {
2083 return (*m_includedRules)[m_i2];
2084 }
2085 ++m_i;
2086 m_includedRules = nullptr;
2087 }
2088
2089 // if is a includedRules
2090 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
2091 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
2092 m_i2 = 0;
2093 m_includedRules = &m_rules[m_i].rule->includedRules;
2094 return (*m_includedRules)[m_i2];
2095 }
2096 ++m_i;
2097 }
2098
2099 if (m_i < m_end) {
2100 ++m_i;
2101 return m_rules[m_i - 1].rule;
2102 }
2103
2104 return nullptr;
2105 }
2106
2107 /// \return current IncludeRules or nullptr
2108 const Context::Rule *currentIncludeRules() const
2109 {
2110 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
2111 }
2112
2113 private:
2114 int m_i = 0;
2115 int m_i2 = 0;
2116 const int m_end;
2117 const QList<ObservableRule> &m_rules;
2118 const QList<const Context::Rule *> *m_includedRules = nullptr;
2119 };
2120
2121 // Dot regex container that satisfies firstNonSpace and column.
2122 struct DotRegex {
2123 /// Append a dot regex rule.
2124 void append(const Context::Rule &rule, const Context::Rule *includedRule)
2125 {
2126 auto array = extractDotRegexes(rule);
2127 if (array[0]) {
2128 *array[0] = {.rule: &rule, .includeRules: includedRule};
2129 }
2130 if (array[1]) {
2131 *array[1] = {.rule: &rule, .includeRules: includedRule};
2132 }
2133 }
2134
2135 /// Search dot regex which hides @p rule
2136 RuleAndInclude find(const Context::Rule &rule)
2137 {
2138 auto array = extractDotRegexes(rule);
2139 if (array[0]) {
2140 return *array[0];
2141 }
2142 if (array[1]) {
2143 return *array[1];
2144 }
2145 return RuleAndInclude{};
2146 }
2147
2148 private:
2149 using Array = std::array<RuleAndInclude *, 2>;
2150
2151 Array extractDotRegexes(const Context::Rule &rule)
2152 {
2153 Array ret{};
2154
2155 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2156 ret[0] = &dotRegex;
2157 } else {
2158 if (rule.firstNonSpace == XmlBool::True) {
2159 ret[0] = &dotRegexFirstNonSpace;
2160 }
2161
2162 if (rule.column == 0) {
2163 ret[1] = &dotRegexColumn0;
2164 } else if (rule.column > 0) {
2165 ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2166 }
2167 }
2168
2169 return ret;
2170 }
2171
2172 RuleAndInclude dotRegex{};
2173 RuleAndInclude dotRegexColumn0{};
2174 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2175 RuleAndInclude dotRegexFirstNonSpace{};
2176 };
2177
2178 bool success = true;
2179
2180 // characters of DetectChar/AnyChar
2181 Char4Tables detectChars;
2182 // characters of dynamic DetectChar
2183 Char4Tables dynamicDetectChars;
2184 // characters of LineContinue
2185 Char4Tables lineContinueChars;
2186
2187 Rule4 intRule{};
2188 Rule4 floatRule{};
2189 Rule4 hlCCharRule{};
2190 Rule4 hlCOctRule{};
2191 Rule4 hlCHexRule{};
2192 Rule4 hlCStringCharRule{};
2193 Rule4 detectIdentifierRule{};
2194
2195 // Contains includedRules and included includedRules
2196 QMap<Context const *, RuleAndInclude> includeContexts;
2197
2198 DotRegex dotRegex;
2199
2200 QList<ObservableRule> observedRules;
2201 observedRules.reserve(asize: context.rules.size());
2202 for (const Context::Rule &rule : context.rules) {
2203 const Context::Rule *includeRule = nullptr;
2204 if (rule.type == Context::Rule::Type::IncludeRules && !rule.context.contexts.isEmpty()) {
2205 if (rule.context.contexts.front()->isOnlyIncluded) {
2206 includeRule = &rule;
2207 }
2208 }
2209
2210 observedRules.push_back(t: {.rule: &rule, .includeRules: includeRule});
2211 if (includeRule) {
2212 for (const Context::Rule *rule2 : rule.includedRules) {
2213 observedRules.push_back(t: {.rule: rule2, .includeRules: includeRule});
2214 }
2215 }
2216 }
2217
2218 for (auto &observedRule : observedRules) {
2219 const Context::Rule &rule = *observedRule.rule;
2220 bool isUnreachable = false;
2221 QList<RuleAndInclude> unreachableBy;
2222
2223 // declare rule as unreachable if ruleAndInclude is not empty
2224 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2225 if (ruleAndInclude) {
2226 isUnreachable = true;
2227 unreachableBy.append(t: ruleAndInclude);
2228 }
2229 };
2230
2231 // declare rule as unreachable if ruleAndIncludes is not empty
2232 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2233 if (!ruleAndIncludes.isEmpty()) {
2234 isUnreachable = true;
2235 unreachableBy.append(l: ruleAndIncludes);
2236 }
2237 };
2238
2239 // check if rule2.firstNonSpace/column is compatible with those of rule
2240 auto isCompatible = [&rule](Context::Rule const &rule2) {
2241 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2242 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2243 };
2244
2245 updateUnreachable1(dotRegex.find(rule));
2246
2247 switch (rule.type) {
2248 // checks if hidden by DetectChar/AnyChar
2249 // then add the characters to detectChars
2250 case Context::Rule::Type::AnyChar: {
2251 auto tables = CharTableArray(detectChars, rule);
2252 updateUnreachable2(tables.find(s: rule.string));
2253 tables.removeNonSpecialWhenSpecial();
2254 tables.append(s: rule.string, rule);
2255 break;
2256 }
2257
2258 // check if is hidden by DetectChar/AnyChar
2259 // then add the characters to detectChars or dynamicDetectChars
2260 case Context::Rule::Type::DetectChar: {
2261 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2262 auto tables = CharTableArray(chars4, rule);
2263 updateUnreachable1(tables.find(c: rule.char0));
2264 tables.removeNonSpecialWhenSpecial();
2265 tables.append(c: rule.char0, rule);
2266 break;
2267 }
2268
2269 // check if hidden by DetectChar/AnyChar
2270 // then add spaces characters to detectChars
2271 case Context::Rule::Type::DetectSpaces: {
2272 auto tables = CharTableArray(detectChars, rule);
2273 updateUnreachable2(tables.find(s: u" \t"_sv));
2274 tables.removeNonSpecialWhenSpecial();
2275 tables.append(c: u' ', rule);
2276 tables.append(c: u'\t', rule);
2277 break;
2278 }
2279
2280 // check if hidden by DetectChar/AnyChar
2281 case Context::Rule::Type::HlCChar:
2282 updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'\''));
2283 updateUnreachable1(hlCCharRule.setRule(rule));
2284 break;
2285
2286 // check if hidden by DetectChar/AnyChar
2287 case Context::Rule::Type::HlCHex:
2288 updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'0'));
2289 updateUnreachable1(hlCHexRule.setRule(rule));
2290 break;
2291
2292 // check if hidden by DetectChar/AnyChar
2293 case Context::Rule::Type::HlCOct:
2294 updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'0'));
2295 updateUnreachable1(hlCOctRule.setRule(rule));
2296 break;
2297
2298 // check if hidden by DetectChar/AnyChar
2299 case Context::Rule::Type::HlCStringChar:
2300 updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'\\'));
2301 updateUnreachable1(hlCStringCharRule.setRule(rule));
2302 break;
2303
2304 // check if hidden by DetectChar/AnyChar
2305 case Context::Rule::Type::Int:
2306 updateUnreachable2(CharTableArray(detectChars, rule).find(s: u"0123456789"_sv));
2307 updateUnreachable1(intRule.setRule(rule));
2308 break;
2309
2310 // check if hidden by DetectChar/AnyChar
2311 case Context::Rule::Type::Float:
2312 updateUnreachable2(CharTableArray(detectChars, rule).find(s: u"0123456789."_sv));
2313 updateUnreachable1(floatRule.setRule(rule));
2314 // check that Float is before Int
2315 updateUnreachable1(Rule4(intRule).setRule(rule));
2316 break;
2317
2318 // check if hidden by another DetectIdentifier rule
2319 case Context::Rule::Type::DetectIdentifier:
2320 updateUnreachable1(detectIdentifierRule.setRule(rule));
2321 break;
2322
2323 // check if hidden by DetectChar/AnyChar or another LineContinue
2324 case Context::Rule::Type::LineContinue: {
2325 updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0));
2326
2327 auto tables = CharTableArray(lineContinueChars, rule);
2328 updateUnreachable1(tables.find(c: rule.char0));
2329 tables.removeNonSpecialWhenSpecial();
2330 tables.append(c: rule.char0, rule);
2331 break;
2332 }
2333
2334 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2335 case Context::Rule::Type::Detect2Chars:
2336 case Context::Rule::Type::RangeDetect:
2337 updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0));
2338 if (!isUnreachable) {
2339 RuleIterator ruleIterator(observedRules, observedRule);
2340 while (const auto *rulePtr = ruleIterator.next()) {
2341 if (isUnreachable) {
2342 break;
2343 }
2344 const auto &rule2 = *rulePtr;
2345 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2346 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2347 }
2348 }
2349 }
2350 break;
2351
2352 case Context::Rule::Type::RegExpr: {
2353 if (rule.isDotRegex) {
2354 dotRegex.append(rule, includedRule: nullptr);
2355 break;
2356 }
2357
2358 // check that `rule` does not have another RegExpr as a prefix
2359 RuleIterator ruleIterator(observedRules, observedRule);
2360 while (const auto *rulePtr = ruleIterator.next()) {
2361 if (isUnreachable) {
2362 break;
2363 }
2364 const auto &rule2 = *rulePtr;
2365 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2366 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(s: rule2.sanitizedString)) {
2367 bool add = (rule.sanitizedString.startsWith(s: rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2368 if (!add) {
2369 // \s.* (sanitized = \s) is considered hiding \s*\S
2370 // we check the quantifiers to see if this is the case
2371 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2372 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2373 auto c3 = rule2.sanitizedString.back().unicode();
2374 if (c3 == '*' || c3 == '?' || c3 == '+') {
2375 add = true;
2376 } else if (c1 == '*' || c1 == '?') {
2377 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2378 } else {
2379 add = true;
2380 }
2381 }
2382 if (add) {
2383 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2384 }
2385 }
2386 }
2387
2388 Q_FALLTHROUGH();
2389 }
2390 // check if a rule does not have another rule as a prefix
2391 case Context::Rule::Type::WordDetect:
2392 case Context::Rule::Type::StringDetect: {
2393 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2394 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2395 RuleIterator ruleIterator(observedRules, observedRule);
2396 while (const auto *rulePtr = ruleIterator.next()) {
2397 if (isUnreachable) {
2398 break;
2399 }
2400
2401 const auto &rule2 = *rulePtr;
2402 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2403 continue;
2404 }
2405
2406 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2407 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2408 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(s: rule2.string, cs: caseSensitivity)) {
2409 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2410 }
2411 }
2412 }
2413
2414 // string used for comparison and truncated from "dynamic" part
2415 QStringView s = rule.string;
2416
2417 // truncate to '%' with dynamic rules
2418 if (rule.dynamic == XmlBool::True) {
2419 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2420 auto result = dynamicPosition.match(subject: rule.string);
2421 s = s.sliced(pos: 0, n: result.capturedLength());
2422 // check if hidden by DetectChar/AnyChar
2423 if (s.size() + 2 <= rule.string.size()) {
2424 auto tables = CharTableArray(dynamicDetectChars, rule);
2425 updateUnreachable1(tables.find(c: s.data()[s.size() + 2]));
2426 }
2427 }
2428
2429 QString sanitizedRegex;
2430 // truncate to special character with RegExpr.
2431 // If regexp contains '|', `s` becomes empty.
2432 if (rule.type == Context::Rule::Type::RegExpr) {
2433 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2434 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2435 const qsizetype result = regularChars.match(subject: rule.string).capturedLength();
2436 const qsizetype pos = qMin(a: result, b: s.size());
2437 if (rule.string.indexOf(ch: u'|', from: pos) < pos) {
2438 sanitizedRegex = rule.string.sliced(pos: 0, n: qMin(a: result, b: s.size()));
2439 sanitizedRegex.replace(re: sanitizeChars, QStringLiteral("\\1"));
2440 s = sanitizedRegex;
2441 } else {
2442 s = QStringView();
2443 }
2444 }
2445
2446 // check if hidden by DetectChar/AnyChar
2447 if (s.size() > 0) {
2448 auto t = CharTableArray(detectChars, rule);
2449 if (rule.insensitive != XmlBool::True) {
2450 updateUnreachable1(t.find(c: s[0]));
2451 } else {
2452 QChar c2[]{s[0].toLower(), s[0].toUpper()};
2453 updateUnreachable2(t.find(s: QStringView(c2, 2)));
2454 }
2455
2456 // StringDetect is a DetectChar
2457 if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) {
2458 auto tables = CharTableArray(detectChars, rule);
2459 auto c = rule.string[0];
2460 if (rule.insensitive != XmlBool::True) {
2461 c = c.toLower();
2462 tables.removeNonSpecialWhenSpecial();
2463 tables.append(c, rule);
2464 c = c.toUpper();
2465 }
2466 tables.removeNonSpecialWhenSpecial();
2467 tables.append(c, rule);
2468 }
2469 }
2470
2471 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2472 if (s.size() > 0 && !isUnreachable) {
2473 // combination of uppercase and lowercase
2474 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2475
2476 RuleIterator ruleIterator(observedRules, observedRule);
2477 while (const auto *rulePtr = ruleIterator.next()) {
2478 if (isUnreachable) {
2479 break;
2480 }
2481 const auto &rule2 = *rulePtr;
2482 const bool isSensitive = (rule2.insensitive == XmlBool::True);
2483 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2484
2485 switch (rule2.type) {
2486 // check that it is not a detectChars prefix
2487 case Context::Rule::Type::Detect2Chars:
2488 if (isCompatible(rule2) && s.size() >= 2) {
2489 if (rule.insensitive != XmlBool::True) {
2490 if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2491 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2492 }
2493 } else {
2494 // when the string is case insensitive,
2495 // all 4 upper/lower case combinations must be found
2496 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2497 if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2498 x = {.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()};
2499 }
2500 };
2501 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2502 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2503 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2504 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2505
2506 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2507 && detect2CharsInsensitives[3]) {
2508 isUnreachable = true;
2509 unreachableBy.append(t: detect2CharsInsensitives[0]);
2510 unreachableBy.append(t: detect2CharsInsensitives[1]);
2511 unreachableBy.append(t: detect2CharsInsensitives[2]);
2512 unreachableBy.append(t: detect2CharsInsensitives[3]);
2513 }
2514 }
2515 }
2516 break;
2517
2518 // check that it is not a StringDetect prefix
2519 case Context::Rule::Type::StringDetect:
2520 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2521 && s.startsWith(s: rule2.string, cs: caseSensitivity)) {
2522 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2523 }
2524 break;
2525
2526 // check if a WordDetect is hidden by another WordDetect
2527 case Context::Rule::Type::WordDetect:
2528 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2529 && 0 == rule.string.compare(s: rule2.string, cs: caseSensitivity)) {
2530 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2531 }
2532 break;
2533
2534 default:;
2535 }
2536 }
2537 }
2538
2539 break;
2540 }
2541
2542 // check if hidden by another keyword rule
2543 case Context::Rule::Type::keyword: {
2544 RuleIterator ruleIterator(observedRules, observedRule);
2545 while (const auto *rulePtr = ruleIterator.next()) {
2546 if (isUnreachable) {
2547 break;
2548 }
2549 const auto &rule2 = *rulePtr;
2550 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2551 updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()});
2552 }
2553 }
2554 // TODO check that all keywords are hidden by another rules
2555 break;
2556 }
2557
2558 // add characters in those used but without checking if they are already.
2559 // <DetectChar char="}" />
2560 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2561 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2562 // <DetectChar char="{" /> <- hidden by previous rule
2563 case Context::Rule::Type::IncludeRules:
2564 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2565 break;
2566 }
2567
2568 if (rule.context.contexts.isEmpty()) {
2569 break;
2570 }
2571
2572 if (auto &ruleAndInclude = includeContexts[rule.context.contexts.front()]) {
2573 updateUnreachable1(ruleAndInclude);
2574 } else {
2575 ruleAndInclude.rule = &rule;
2576 }
2577
2578 for (const auto *rulePtr : rule.includedIncludeRules) {
2579 for (auto *context : rulePtr->context.contexts) {
2580 includeContexts.insert(key: context, value: RuleAndInclude{.rule: rulePtr, .includeRules: &rule});
2581 }
2582 }
2583
2584 if (observedRule.includeRules) {
2585 break;
2586 }
2587
2588 for (const auto *rulePtr : rule.includedRules) {
2589 const auto &rule2 = *rulePtr;
2590 switch (rule2.type) {
2591 case Context::Rule::Type::AnyChar: {
2592 auto tables = CharTableArray(detectChars, rule2);
2593 tables.removeNonSpecialWhenSpecial();
2594 tables.append(s: rule2.string, rule: rule2, includeRule: &rule);
2595 break;
2596 }
2597
2598 case Context::Rule::Type::DetectChar: {
2599 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2600 auto tables = CharTableArray(chars4, rule2);
2601 tables.removeNonSpecialWhenSpecial();
2602 tables.append(c: rule2.char0, rule: rule2, includeRule: &rule);
2603 break;
2604 }
2605
2606 case Context::Rule::Type::DetectSpaces: {
2607 auto tables = CharTableArray(detectChars, rule2);
2608 tables.removeNonSpecialWhenSpecial();
2609 tables.append(c: u' ', rule: rule2, includeRule: &rule);
2610 tables.append(c: u'\t', rule: rule2, includeRule: &rule);
2611 break;
2612 }
2613
2614 case Context::Rule::Type::HlCChar:
2615 hlCCharRule.setRule(rule: rule2, includeRules: &rule);
2616 break;
2617
2618 case Context::Rule::Type::HlCHex:
2619 hlCHexRule.setRule(rule: rule2, includeRules: &rule);
2620 break;
2621
2622 case Context::Rule::Type::HlCOct:
2623 hlCOctRule.setRule(rule: rule2, includeRules: &rule);
2624 break;
2625
2626 case Context::Rule::Type::HlCStringChar:
2627 hlCStringCharRule.setRule(rule: rule2, includeRules: &rule);
2628 break;
2629
2630 case Context::Rule::Type::Int:
2631 intRule.setRule(rule: rule2, includeRules: &rule);
2632 break;
2633
2634 case Context::Rule::Type::Float:
2635 floatRule.setRule(rule: rule2, includeRules: &rule);
2636 break;
2637
2638 case Context::Rule::Type::LineContinue: {
2639 auto tables = CharTableArray(lineContinueChars, rule2);
2640 tables.removeNonSpecialWhenSpecial();
2641 tables.append(c: rule2.char0, rule: rule2, includeRule: &rule);
2642 break;
2643 }
2644
2645 case Context::Rule::Type::RegExpr:
2646 if (rule2.isDotRegex) {
2647 dotRegex.append(rule: rule2, includedRule: &rule);
2648 }
2649 break;
2650
2651 case Context::Rule::Type::StringDetect: {
2652 // StringDetect is a DetectChar
2653 if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) {
2654 auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2655 auto tables = CharTableArray(chars4, rule2);
2656 tables.removeNonSpecialWhenSpecial();
2657 tables.append(c: rule2.string.back(), rule: rule2, includeRule: &rule);
2658 }
2659 break;
2660 }
2661
2662 case Context::Rule::Type::WordDetect:
2663 case Context::Rule::Type::Detect2Chars:
2664 case Context::Rule::Type::IncludeRules:
2665 case Context::Rule::Type::DetectIdentifier:
2666 case Context::Rule::Type::keyword:
2667 case Context::Rule::Type::Unknown:
2668 case Context::Rule::Type::RangeDetect:
2669 break;
2670 }
2671 }
2672 break;
2673
2674 case Context::Rule::Type::Unknown:
2675 break;
2676 }
2677
2678 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2679 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2680 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2681 unreachableIncludedRule.unreachableBy.append(l: unreachableBy);
2682 } else {
2683 unreachableIncludedRule.alwaysUnreachable = false;
2684 }
2685 } else if (isUnreachable) {
2686 success = false;
2687 QString message;
2688 message.reserve(asize: 128);
2689 for (auto &ruleAndInclude : std::as_const(t&: unreachableBy)) {
2690 message += u"line "_sv;
2691 if (ruleAndInclude.includeRules) {
2692 message += QString::number(ruleAndInclude.includeRules->line);
2693 message += u" [by '"_sv;
2694 message += ruleAndInclude.includeRules->context.name;
2695 message += u"' line "_sv;
2696 message += QString::number(ruleAndInclude.rule->line);
2697 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2698 message += u" ("_sv;
2699 message += ruleAndInclude.rule->filename;
2700 message += u')';
2701 }
2702 message += u']';
2703 } else {
2704 message += QString::number(ruleAndInclude.rule->line);
2705 }
2706 message += u", "_sv;
2707 }
2708 message.chop(n: 2);
2709 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2710 }
2711 }
2712
2713 return success;
2714 }
2715
2716 //! Proposes to merge certain rule sequences
2717 //! - several DetectChar/AnyChar into AnyChar
2718 //! - several RegExpr into one RegExpr
2719 bool suggestRuleMerger(const QString &filename, const Context &context) const
2720 {
2721 bool success = true;
2722
2723 if (context.rules.isEmpty()) {
2724 return success;
2725 }
2726
2727 auto it = context.rules.begin();
2728 const auto end = context.rules.end() - 1;
2729
2730 for (; it < end; ++it) {
2731 const auto &rule1 = *it;
2732 const auto &rule2 = it[1];
2733
2734 auto isCommonCompatible = [&] {
2735 if (rule1.lookAhead != rule2.lookAhead) {
2736 return false;
2737 }
2738 // ignore attribute when lookAhead is true
2739 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2740 return false;
2741 }
2742 // clang-format off
2743 return rule1.beginRegion == rule2.beginRegion
2744 && rule1.endRegion == rule2.endRegion
2745 && rule1.firstNonSpace == rule2.firstNonSpace
2746 && rule1.context.contexts == rule2.context.contexts
2747 && rule1.context.popCount == rule2.context.popCount;
2748 // clang-format on
2749 };
2750
2751 switch (rule1.type) {
2752 // request to merge StringDetect with AnyChar
2753 case Context::Rule::Type::StringDetect:
2754 if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) {
2755 break;
2756 }
2757 Q_FALLTHROUGH();
2758 // request to merge AnyChar/DetectChar
2759 case Context::Rule::Type::AnyChar:
2760 case Context::Rule::Type::DetectChar:
2761 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar
2762 || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1))
2763 && isCommonCompatible() && rule1.column == rule2.column) {
2764 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2765 success = false;
2766 }
2767 break;
2768
2769 // request to merge multiple RegExpr
2770 case Context::Rule::Type::RegExpr:
2771 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2772 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2773 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2774 success = false;
2775 }
2776 break;
2777
2778 case Context::Rule::Type::DetectSpaces:
2779 case Context::Rule::Type::HlCChar:
2780 case Context::Rule::Type::HlCHex:
2781 case Context::Rule::Type::HlCOct:
2782 case Context::Rule::Type::HlCStringChar:
2783 case Context::Rule::Type::Int:
2784 case Context::Rule::Type::Float:
2785 case Context::Rule::Type::LineContinue:
2786 case Context::Rule::Type::WordDetect:
2787 case Context::Rule::Type::Detect2Chars:
2788 case Context::Rule::Type::IncludeRules:
2789 case Context::Rule::Type::DetectIdentifier:
2790 case Context::Rule::Type::keyword:
2791 case Context::Rule::Type::Unknown:
2792 case Context::Rule::Type::RangeDetect:
2793 break;
2794 }
2795 }
2796
2797 return success;
2798 }
2799
2800 //! Initialize the referenced context (ContextName::context)
2801 //! Some input / output examples are:
2802 //! - "#stay" -> ""
2803 //! - "#pop" -> ""
2804 //! - "Comment" -> "Comment"
2805 //! - "#pop!Comment" -> "Comment"
2806 //! - "##ISO C++" -> ""
2807 //! - "Comment##ISO C++"-> "Comment" in ISO C++
2808 void resolveContextName(Definition &definition, ContextName &contextName, int line, const char *attrName)
2809 {
2810 QStringView name = contextName.name;
2811 if (name.isEmpty()) {
2812 contextName.stay = true;
2813 } else if (name.startsWith(s: u"#stay"_sv)) {
2814 contextName.stay = true;
2815 if (name.size() > 5) {
2816 qWarning() << definition.filename << "line" << line << "invalid context in " << attrName << "=" << contextName.name;
2817 m_success = false;
2818 }
2819 } else {
2820 while (name.startsWith(s: u"#pop"_sv)) {
2821 name = name.sliced(pos: 4);
2822 ++contextName.popCount;
2823 }
2824
2825 if (contextName.popCount && !name.isEmpty()) {
2826 if (name.startsWith(c: u'!') && name.size() > 1) {
2827 name = name.sliced(pos: 1);
2828 } else {
2829 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name in " << attrName << "="
2830 << contextName.name;
2831 m_success = false;
2832 }
2833 }
2834
2835 if (!name.isEmpty()) {
2836 for (auto contextPart : QStringTokenizer{name, u'!'}) {
2837 auto *ctx = resolveContextPartName(definition, contextNamePart: contextPart, line, attrName, originalContext: contextName.name);
2838 if (ctx) {
2839 contextName.contexts.push_back(t: ctx);
2840 }
2841 }
2842
2843 if (contextName.contexts.size() > 1 && definition.kateVersion < KateVersion{6, 22}) {
2844 qWarning() << definition.filename << "line" << line
2845 << "multiple contexts are only available since version \"6.21\". Please, increase kateversion.";
2846 m_success = false;
2847 }
2848 }
2849 }
2850 }
2851
2852 Context *resolveContextPartName(Definition &definition, QStringView contextNamePart, int line, const char *attrName, QStringView originalContext)
2853 {
2854 auto originalContextNamePart = contextNamePart;
2855 auto *contextMap = &definition.contexts;
2856 const int idx = contextNamePart.indexOf(s: u"##"_sv);
2857 if (idx != -1) {
2858 auto defName = contextNamePart.sliced(pos: idx + 2);
2859 auto it = m_definitions.find(key: defName.toString());
2860 if (it == m_definitions.end()) {
2861 qWarning() << definition.filename << "line" << line << "unknown definition" << defName << "in" << attrName << "=" << originalContext;
2862 m_success = false;
2863 return nullptr;
2864 }
2865 contextMap = &it->contexts;
2866 definition.referencedDefinitions.insert(value: &*it);
2867 contextNamePart = contextNamePart.sliced(pos: 0, n: idx);
2868 if (contextNamePart.isEmpty()) {
2869 contextNamePart = it->firstContextName;
2870 }
2871 }
2872
2873 auto ctxIt = contextMap->find(key: contextNamePart.toString());
2874 if (ctxIt != contextMap->end()) {
2875 return &*ctxIt;
2876 }
2877
2878 qWarning() << definition.filename << "line" << line << "unknown context" << originalContextNamePart << "in " << attrName << "=" << originalContext;
2879 m_success = false;
2880 return nullptr;
2881 }
2882
2883 QMap<QString, Definition> m_definitions;
2884 QHash<QString, QString> m_names;
2885 Definition *m_currentDefinition = nullptr;
2886 Keywords *m_currentKeywords = nullptr;
2887 Context *m_currentContext = nullptr;
2888 // xml reader variable
2889 //@{
2890 QString m_textContent;
2891 bool m_inKeywordItem = false;
2892 //@}
2893 bool m_success = true;
2894};
2895
2896class HlCompressor
2897{
2898public:
2899 HlCompressor(const QString &kateVersion)
2900 : m_kateVersion(kateVersion)
2901 {
2902 m_hasElems.push_back(t: true);
2903 }
2904
2905 const QString &compressedXML() const
2906 {
2907 return m_data;
2908 }
2909
2910 /**
2911 * Reduce xml space by removing what is superfluous.
2912 * - transforms boolean values into 0 or 1.
2913 * - remove unused attributes.
2914 * - remove spaces and comments.
2915 * - remove context attributes referring to #stay (because this is the default).
2916 * - replace Detect2Chars with StringDetect (String="xy" is shorter than char="x" char1="y").
2917 * - sort contexts by frequency of use to accelerate their search during loading.
2918 */
2919 void processElement(const QXmlStreamReader &xml)
2920 {
2921 switch (xml.tokenType()) {
2922 case QXmlStreamReader::StartElement: {
2923 closePreviousOpenTag(out&: m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data);
2924 m_hasElems.push_back(t: false);
2925
2926 const auto tagName = xml.name();
2927 if (tagName == u"contexts"_sv) {
2928 m_inContexts = true;
2929 m_data += u"<contexts"_sv;
2930 } else if (m_inContexts) {
2931 Context &ctx = (m_contexts.empty() || tagName == u"context"_sv) ? m_contexts.emplace_back() : m_contexts.back();
2932 QString &out = ctx.data;
2933 const bool isDetect2Chars = tagName == u"Detect2Chars"_sv;
2934 out += u'<' % (isDetect2Chars ? u"StringDetect"_sv : tagName);
2935
2936 auto attrs = xml.attributes();
2937 sortAttributes(attrs);
2938 for (const auto &attr : attrs) {
2939 const auto attrName = attr.name();
2940 auto value = attr.value();
2941 // transform Detect2Chars char and char1 attributes to StringDetect String attribute
2942 if (isDetect2Chars && (attrName == u"char"_sv || attrName == u"char1"_sv)) {
2943 if (attrName == u"char"_sv) {
2944 const auto ch0 = value;
2945 const auto ch1 = attrs.value(qualifiedName: u"char1"_sv);
2946 QChar chars[]{ch0.isEmpty() ? u' ' : ch0[0], ch1.isEmpty() ? u' ' : ch1[0]};
2947 writeXmlAttribute(out, attrName: u"String"_sv, value: QStringView(chars, 2), tagName);
2948 }
2949 } else if (attrName == u"context"_sv || attrName == u"lineEndContext"_sv || attrName == u"fallthroughContext"_sv
2950 || attrName == u"lineEmptyContext"_sv) {
2951 // ignore #stay context because this is the default
2952 if (value != u"#stay"_sv) {
2953 writeXmlAttribute(out, attrName, value, tagName);
2954
2955 /*
2956 * Extract context name and increment context counter
2957 */
2958 bool hasPop = false;
2959 while (value.startsWith(s: u"#pop"_sv)) {
2960 hasPop = true;
2961 value = value.sliced(pos: 4);
2962 }
2963 if (hasPop && !value.isEmpty()) {
2964 value = value.sliced(pos: 1);
2965 }
2966 if (!value.isEmpty() && -1 == value.indexOf(s: u"##"_sv)) {
2967 m_contextRefs[value.toString()]++;
2968 }
2969 }
2970 } else if (tagName == u"LineContinue"_sv && attrName == u"char"_sv && value == u"\\") {
2971 // ignore char="\\" with LineContinue
2972 } else {
2973 if (attrName == u"name"_sv) {
2974 ctx.name = value.toString();
2975 }
2976 writeXmlAttribute(out, attrName, value, tagName);
2977 }
2978 }
2979 } else if (m_inList) {
2980 m_inItem = true;
2981 m_isIncludeItem = (tagName == u"include"_sv);
2982 } else {
2983 if (tagName == u"list"_sv) {
2984 m_keywords.clear();
2985 m_inList = true;
2986 }
2987 m_data += u'<' % tagName;
2988 const auto attrs = xml.attributes();
2989 for (const auto &attr : attrs) {
2990 auto name = attr.name();
2991 auto value = (name == u"kateversion") ? QStringView(m_kateVersion) : attr.value();
2992 writeXmlAttribute(out&: m_data, attrName: name, value, tagName);
2993 }
2994 }
2995 break;
2996 }
2997
2998 case QXmlStreamReader::EndElement: {
2999 const auto tagName = xml.name();
3000 if (m_inItem) {
3001 m_inItem = false;
3002 m_hasElems.pop_back();
3003 break;
3004 } else if (m_inList) {
3005 m_inList = false;
3006 std::sort(first: m_keywords.begin(), last: m_keywords.end());
3007 m_keywords.erase(first: std::unique(first: m_keywords.begin(), last: m_keywords.end()), last: m_keywords.end());
3008 for (const auto &item : m_keywords) {
3009 m_data += item.isIncludeTag ? u"<include>"_sv : u"<item>"_sv;
3010 writeXmlText(out&: m_data, text: item.text);
3011 m_data += item.isIncludeTag ? u"</include>"_sv : u"</item>"_sv;
3012 }
3013 } else if (m_inContexts && !m_contexts.empty() && tagName == u"contexts"_sv) {
3014 m_inContexts = false;
3015 // sorting contexts by the most used (ignore first context)
3016 std::sort(first: m_contexts.begin() + 1, last: m_contexts.end(), comp: [&](auto &ctx1, auto &ctx2) {
3017 auto i1 = m_contextRefs.value(ctx1.name);
3018 auto i2 = m_contextRefs.value(ctx2.name);
3019 if (i1 != i2) {
3020 return i1 > i2;
3021 }
3022 // for a reproducible build, contexts with the same number of uses are sorted by name
3023 return ctx1.name < ctx2.name;
3024 });
3025 for (const auto &ctx : m_contexts) {
3026 m_data += ctx.data;
3027 }
3028 }
3029
3030 QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data;
3031 if (m_hasElems.back()) {
3032 out += u"</"_sv % tagName % u'>';
3033 } else {
3034 out += u"/>"_sv;
3035 }
3036 m_hasElems.pop_back();
3037 break;
3038 }
3039
3040 case QXmlStreamReader::EntityReference:
3041 case QXmlStreamReader::Characters:
3042 if (m_inItem) {
3043 m_keywords.push_back(x: {.text: xml.text().toString(), .isIncludeTag: m_isIncludeItem});
3044 }
3045 break;
3046
3047 default:;
3048 }
3049 }
3050
3051private:
3052 void closePreviousOpenTag(QString &out)
3053 {
3054 if (!m_hasElems.back()) {
3055 m_hasElems.back() = true;
3056 out += u'>';
3057 }
3058 }
3059
3060 /**
3061 * Write \p text escaping special characters.
3062 */
3063 static void writeXmlText(QString &out, QStringView text, bool escapeDQ = false)
3064 {
3065 for (const QChar &c : text) {
3066 if (c == u'<') {
3067 out += u"&lt;"_sv;
3068 } else if (c == u'&') {
3069 out += u"&amp;"_sv;
3070 } else if (escapeDQ && c == u'"') {
3071 out += u"&#34;"_sv;
3072 } else if (c == u'\t') {
3073 // non-space whitespace character in an attribute is remplaced with space...
3074 out += u"&#9;"_sv;
3075 } else {
3076 out += c;
3077 }
3078 }
3079 }
3080
3081 /**
3082 * Write attribut in \p out.
3083 * Booleans are converted to 0, 1 or ignored if this corresponds to the default value.
3084 * Values will be written with either double quotes or single quotes,
3085 * depending on which takes up the least space
3086 */
3087 static void writeXmlAttribute(QString &out, QStringView attrName, QStringView value, QStringView tagName)
3088 {
3089 enum class DefaultBool {
3090 // default value is false
3091 False,
3092 // default value is true
3093 True,
3094 // manipulate as a tribool whose attribute absence is equivalent to None
3095 None,
3096 // not used
3097 Ignored,
3098 // default value is false, but None for <keyword>
3099 FalseOrKeywordTag,
3100 // default value is true, but depends on another value for <keywords>
3101 TrueOrKeywordsTag,
3102 // default is false, but ignored in <context>
3103 DynamicAttr,
3104 };
3105 static const QHash<QStringView, DefaultBool> booleanAttrs({
3106 {u"fallthrough"_sv, DefaultBool::Ignored},
3107 {u"dynamic"_sv, DefaultBool::DynamicAttr},
3108 {u"hidden"_sv, DefaultBool::False},
3109 {u"indentationsensitive"_sv, DefaultBool::False},
3110 {u"noIndentationBasedFolding"_sv, DefaultBool::False},
3111 {u"lookAhead"_sv, DefaultBool::False},
3112 {u"firstNonSpace"_sv, DefaultBool::False},
3113 {u"insensitive"_sv, DefaultBool::FalseOrKeywordTag},
3114 {u"minimal"_sv, DefaultBool::False},
3115 {u"includeAttrib"_sv, DefaultBool::False},
3116 {u"italic"_sv, DefaultBool::None},
3117 {u"bold"_sv, DefaultBool::None},
3118 {u"underline"_sv, DefaultBool::None},
3119 {u"strikeOut"_sv, DefaultBool::None},
3120 {u"spellChecking"_sv, DefaultBool::True},
3121 {u"casesensitive"_sv, DefaultBool::TrueOrKeywordsTag},
3122 {u"ignored"_sv, DefaultBool::Ignored},
3123 });
3124
3125 auto it = booleanAttrs.find(key: attrName);
3126 // convert boolean value
3127 if (it != booleanAttrs.end()) {
3128 bool b = KSyntaxHighlighting::Xml::attrToBool(str: value);
3129 bool ignoreAttr = false;
3130 switch (*it) {
3131 case DefaultBool::Ignored:
3132 ignoreAttr = true;
3133 break;
3134 case DefaultBool::TrueOrKeywordsTag:
3135 ignoreAttr = (tagName == u"keywords"_sv) ? false : b;
3136 break;
3137 case DefaultBool::True:
3138 ignoreAttr = b;
3139 break;
3140 case DefaultBool::FalseOrKeywordTag:
3141 ignoreAttr = (tagName == u"keyword"_sv) ? false : !b;
3142 break;
3143 case DefaultBool::DynamicAttr:
3144 ignoreAttr = (tagName == u"context"_sv) || !b;
3145 break;
3146 case DefaultBool::False:
3147 ignoreAttr = !b;
3148 break;
3149 case DefaultBool::None:
3150 ignoreAttr = false;
3151 break;
3152 }
3153 if (!ignoreAttr) {
3154 out += u' ' % attrName % u"=\""_sv % (b ? u'1' : u'0') % u'"';
3155 }
3156 } else {
3157 const bool hasDQ = value.contains(c: u'"');
3158 // attribute in double quotes when the value does not contain " or contains " and '
3159 if (!hasDQ || value.contains(c: u'\'')) {
3160 out += u' ' % attrName % u"=\""_sv;
3161 writeXmlText(out, text: value, escapeDQ: hasDQ);
3162 out += u'"';
3163 // attribute in single quotes because the value contains "
3164 } else {
3165 out += u' ' % attrName % u"='"_sv;
3166 writeXmlText(out, text: value);
3167 out += u'\'';
3168 }
3169 }
3170 }
3171
3172 /**
3173 * Sort attributes for better compression by rcc.
3174 */
3175 static void sortAttributes(QXmlStreamAttributes &attrs)
3176 {
3177 static const QHash<QStringView, int> priorityAttrs({
3178 // context and rule
3179 {u"attribute"_sv, 5},
3180
3181 // context and itemData
3182 {u"name"_sv, 4},
3183
3184 // context
3185 {u"noIndentationBasedFolding"_sv, 11},
3186 {u"lineEndContext"_sv, 9},
3187 {u"lineEmptyContext"_sv, 8},
3188 {u"fallthroughContext"_sv, 7},
3189
3190 // rule
3191 {u"lookAhead"_sv, 100},
3192 {u"firstNonSpace"_sv, 99},
3193 {u"dynamic"_sv, 98},
3194 {u"minimal"_sv, 97},
3195 {u"includeAttrib"_sv, 96},
3196 {u"insensitive"_sv, 95},
3197 {u"column"_sv, 50},
3198 {u"beginRegion"_sv, 40},
3199 {u"endRegion"_sv, 41},
3200 {u"weakDeliminator"_sv, 31},
3201 {u"additionalDeliminator"_sv, 30},
3202 {u"context"_sv, 20},
3203 {u"String"_sv, 2},
3204 {u"char"_sv, 2},
3205
3206 // itemData
3207 {u"strikeOut"_sv, 100},
3208 {u"underline"_sv, 99},
3209 {u"italic"_sv, 98},
3210 {u"bold"_sv, 97},
3211 {u"spellChecking"_sv, 96},
3212 {u"defStyleNum"_sv, 95},
3213 {u"color"_sv, 94},
3214 {u"backgroundColor"_sv, 93},
3215 {u"selBackgroundColor"_sv, 92},
3216 {u"selColor"_sv, 91},
3217 });
3218 std::sort(first: attrs.begin(), last: attrs.end(), comp: [](auto &attr1, auto &attr2) {
3219 auto i1 = priorityAttrs.value(attr1.name());
3220 auto i2 = priorityAttrs.value(attr2.name());
3221 if (i1 != i2) {
3222 return i1 < i2;
3223 }
3224 return attr1.name() < attr2.name();
3225 });
3226 }
3227
3228 struct Context {
3229 QString name;
3230 QString data;
3231 };
3232 struct Item {
3233 QString text;
3234 bool isIncludeTag;
3235
3236 std::strong_ordering operator<=>(const Item &other) const = default;
3237 };
3238 QString m_data = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s;
3239 std::vector<Context> m_contexts;
3240 QHash<QString, int> m_contextRefs;
3241 std::vector<Item> m_keywords;
3242 QVarLengthArray<bool, 8> m_hasElems;
3243 QString m_kateVersion;
3244 bool m_inContexts = false;
3245 bool m_inList = false;
3246 bool m_inItem = false;
3247 bool m_isIncludeItem = false;
3248};
3249
3250void printFileError(const QFile &file)
3251{
3252 qWarning() << "Failed to open" << file.fileName() << "-" << file.errorString();
3253}
3254
3255void printXmlError(const QString &fileName, const QXmlStreamReader &xml)
3256{
3257 qWarning() << fileName << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
3258};
3259
3260QStringList readListing(const QString &fileName)
3261{
3262 QFile file(fileName);
3263 if (!file.open(flags: QIODevice::ReadOnly)) {
3264 printFileError(file);
3265 return QStringList();
3266 }
3267
3268 QXmlStreamReader xml(&file);
3269 QStringList listing;
3270 while (!xml.atEnd()) {
3271 xml.readNext();
3272
3273 // add only .xml files, no .json or stuff
3274 if (xml.isCharacters() && xml.text().contains(s: QLatin1String(".xml"))) {
3275 listing.append(t: xml.text().toString());
3276 }
3277 }
3278
3279 if (xml.hasError()) {
3280 printXmlError(fileName, xml);
3281 listing.clear();
3282 }
3283
3284 return listing;
3285}
3286
3287/**
3288 * check if the "extensions" attribute have valid wildcards
3289 * @param extensions extensions string to check
3290 * @return valid?
3291 */
3292bool checkExtensions(QStringView extensions)
3293{
3294 // get list of extensions
3295 const QList<QStringView> extensionParts = extensions.split(sep: u';', behavior: Qt::SkipEmptyParts);
3296
3297 // ok if empty
3298 if (extensionParts.isEmpty()) {
3299 return true;
3300 }
3301
3302 // check that only valid wildcard things are inside the parts
3303 for (const auto &extension : extensionParts) {
3304 for (const auto c : extension) {
3305 // eat normal things
3306 if (c.isDigit() || c.isLetter()) {
3307 continue;
3308 }
3309
3310 // allow some special characters
3311 if (c == u'.' || c == u'-' || c == u'_' || c == u'+') {
3312 continue;
3313 }
3314
3315 // only allowed wildcard things: '?' and '*'
3316 if (c == u'?' || c == u'*') {
3317 continue;
3318 }
3319
3320 qWarning() << "invalid character" << c << "seen in extensions wildcard";
3321 return false;
3322 }
3323 }
3324
3325 // all checks passed
3326 return true;
3327}
3328
3329struct CompressedFile {
3330 QString fileName;
3331 QString xmlData;
3332 bool generated;
3333};
3334
3335}
3336
3337int main(int argc, char *argv[])
3338{
3339 // get app instance
3340 QCoreApplication app(argc, argv);
3341
3342 // ensure enough arguments are passed
3343 if (app.arguments().size() < 4) {
3344 return 1;
3345 }
3346
3347#ifdef HAS_XERCESC
3348 // care for proper init and cleanup
3349 XMLPlatformUtils::Initialize();
3350 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
3351
3352 /*
3353 * parse XSD first time and cache it
3354 */
3355 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
3356
3357 // create parser for the XSD
3358 CustomXMLValidator parser(&xsd);
3359
3360 // load grammar into the pool, on error just abort
3361 const auto xsdFile = app.arguments().at(2);
3362 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || parser.eh.failed()) {
3363 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(parser.messages));
3364 return 2;
3365 }
3366
3367 // lock the pool, no later modifications wanted!
3368 xsd.lockPool();
3369#endif
3370
3371 const QString hlFilenamesListing = app.arguments().value(i: 3);
3372 if (hlFilenamesListing.isEmpty()) {
3373 return 1;
3374 }
3375
3376 QStringList hlFilenames = readListing(fileName: hlFilenamesListing);
3377 if (hlFilenames.isEmpty()) {
3378 qWarning(msg: "Failed to read %s", qPrintable(hlFilenamesListing));
3379 return 3;
3380 }
3381
3382 // text attributes
3383 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("alternativeNames") << QStringLiteral("section")
3384 << QStringLiteral("mimetype") << QStringLiteral("extensions") << QStringLiteral("style")
3385 << QStringLiteral("author") << QStringLiteral("license") << QStringLiteral("indenter");
3386
3387 // index all given highlightings
3388 HlFilesChecker filesChecker;
3389 QVariantMap hls;
3390 int anyError = 0;
3391 std::vector<CompressedFile> compressedFiles;
3392 for (const QString &hlFilename : std::as_const(t&: hlFilenames)) {
3393 QFile hlFile(hlFilename);
3394 if (!hlFile.open(flags: QIODevice::ReadOnly)) {
3395 printFileError(file: hlFile);
3396 anyError = 3;
3397 continue;
3398 }
3399
3400#ifdef HAS_XERCESC
3401 // create parser
3402 CustomXMLValidator parser(&xsd);
3403
3404 // parse the XML file
3405 parser.parse((const char16_t *)hlFile.fileName().utf16());
3406
3407 // report issues
3408 if (parser.eh.failed()) {
3409 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(parser.messages));
3410 anyError = 4;
3411 continue;
3412 }
3413#endif
3414
3415 // read the needed attributes from toplevel language tag
3416 hlFile.reset();
3417 QXmlStreamReader xml(&hlFile);
3418 if (xml.readNextStartElement()) {
3419 if (xml.name() != QLatin1String("language")) {
3420 anyError = 5;
3421 continue;
3422 }
3423 } else {
3424 anyError = 6;
3425 continue;
3426 }
3427
3428 // map to store hl info
3429 QVariantMap hl;
3430
3431 // transfer text attributes
3432 for (const QString &attribute : std::as_const(t: textAttributes)) {
3433 hl[attribute] = xml.attributes().value(qualifiedName: attribute).toString();
3434 }
3435
3436 // check if extensions have the right format
3437 if (!checkExtensions(extensions: hl[QStringLiteral("extensions")].toString())) {
3438 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
3439 anyError = 23;
3440 }
3441
3442 // numerical attributes
3443 hl[QStringLiteral("version")] = xml.attributes().value(qualifiedName: QLatin1String("version")).toInt();
3444 hl[QStringLiteral("priority")] = xml.attributes().value(qualifiedName: QLatin1String("priority")).toInt();
3445
3446 // boolean attributes
3447 hl[QStringLiteral("hidden")] = attrToBool(str: xml.attributes().value(qualifiedName: QLatin1String("hidden")));
3448 hl[QStringLiteral("generated")] = attrToBool(str: xml.attributes().value(qualifiedName: QLatin1String("generated")));
3449
3450 // keep some strings as UTF-8 for faster translations
3451 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
3452 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
3453
3454 // remember hl
3455 hls[QFileInfo(hlFile).fileName()] = hl;
3456
3457 const QStringView kateversion = xml.attributes().value(QStringLiteral("kateversion"));
3458 const QString hlName = hl[QStringLiteral("name")].toString();
3459 const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames")].toString();
3460
3461 filesChecker.setDefinition(verStr: kateversion,
3462 filename: hlFilename,
3463 name: hlName,
3464 alternativeNames: hlAlternativeNames.split(sep: u';', behavior: Qt::SkipEmptyParts),
3465 generated: hl[QStringLiteral("generated")].toBool());
3466
3467 // As the compressor removes "fallthrough" attribute which is required with
3468 // "fallthroughContext" before the 5.62 version, the minimum version is
3469 // automatically increased
3470 HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u"5.62"_s : kateversion.toString());
3471 compressor.processElement(xml);
3472
3473 // scan for broken regex or keywords with spaces
3474 while (!xml.atEnd()) {
3475 xml.readNext();
3476 filesChecker.processElement(xml);
3477 compressor.processElement(xml);
3478 }
3479
3480 if (xml.hasError()) {
3481 anyError = 33;
3482 printXmlError(fileName: hlFilename, xml);
3483 }
3484
3485 compressedFiles.emplace_back(args: CompressedFile{.fileName: QFileInfo(hlFilename).fileName(), .xmlData: compressor.compressedXML(), .generated: hl[QStringLiteral("generated")].toBool()});
3486 }
3487
3488 filesChecker.resolveContexts();
3489
3490 if (!filesChecker.check()) {
3491 anyError = 7;
3492 }
3493
3494 // bail out if any problem was seen
3495 if (anyError) {
3496 return anyError;
3497 }
3498
3499 // check compressed file
3500 HlFilesChecker filesChecker2;
3501 const QString compressedDir = app.arguments().at(i: 4) + u"/"_sv;
3502 for (const auto &compressedFile : std::as_const(t&: compressedFiles)) {
3503 const auto outFileName = compressedDir + compressedFile.fileName;
3504 auto utf8Data = compressedFile.xmlData.toUtf8();
3505
3506#ifdef HAS_XERCESC
3507 // create parser
3508 CustomXMLValidator parser(&xsd);
3509
3510 auto utf8Filename = outFileName.toUtf8();
3511 utf8Filename.append('\0');
3512 // parse the XML file
3513 MemBufInputSource membuf(reinterpret_cast<const XMLByte *>(utf8Data.constData()), utf8Data.size(), utf8Filename.data());
3514
3515 // report issues
3516 if (parser.eh.failed()) {
3517 qWarning("Failed to validate XML %s: %s", qPrintable(outFileName), qPrintable(parser.messages));
3518 return 8;
3519 }
3520#endif
3521
3522 QBuffer buffer(&utf8Data);
3523 buffer.open(openMode: QBuffer::ReadOnly);
3524 QXmlStreamReader xml(&buffer);
3525 // scan for broken file
3526 while (!xml.atEnd()) {
3527 if (xml.readNext() == QXmlStreamReader::TokenType::StartElement && xml.name() == u"language"_sv) {
3528 const auto attrs = xml.attributes();
3529 const auto version = attrs.value(qualifiedName: u"kateversion"_sv);
3530 const QString hlName = attrs.value(qualifiedName: u"name"_sv).toString();
3531 const QString hlAlternativeNames = attrs.value(qualifiedName: u"alternativeNames"_sv).toString();
3532 filesChecker2.setDefinition(verStr: version, filename: outFileName, name: hlName, alternativeNames: hlAlternativeNames.split(sep: u';', behavior: Qt::SkipEmptyParts), generated: compressedFile.generated);
3533 }
3534 filesChecker2.processElement(xml);
3535 }
3536
3537 if (xml.hasError()) {
3538 printXmlError(fileName: outFileName, xml);
3539 return 9;
3540 }
3541
3542 // create outfile, after all has worked!
3543 QFile outFile(outFileName);
3544 if (!outFile.open(flags: QIODevice::WriteOnly | QIODevice::Truncate)) {
3545 return 10;
3546 }
3547 outFile.write(data: utf8Data);
3548 }
3549
3550 filesChecker2.resolveContexts();
3551
3552 // bail out if any problem was seen
3553 if (!filesChecker2.check()) {
3554 return 11;
3555 }
3556
3557 // create outfile, after all has worked!
3558 QFile outFile(app.arguments().at(i: 1));
3559 if (!outFile.open(flags: QIODevice::WriteOnly | QIODevice::Truncate)) {
3560 return 12;
3561 }
3562
3563 // write out json
3564 outFile.write(data: QCborValue::fromVariant(variant: QVariant(hls)).toCbor());
3565
3566 // be done
3567 return 0;
3568}
3569

source code of syntax-highlighting/src/indexer/katehighlightingindexer.cpp