1 | /* |
2 | SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org> |
3 | SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com> |
4 | |
5 | SPDX-License-Identifier: MIT |
6 | */ |
7 | |
8 | #include <QBuffer> |
9 | #include <QCborValue> |
10 | #include <QCoreApplication> |
11 | #include <QDebug> |
12 | #include <QFile> |
13 | #include <QFileInfo> |
14 | #include <QMutableMapIterator> |
15 | #include <QRegularExpression> |
16 | #include <QScopeGuard> |
17 | #include <QString> |
18 | #include <QVariant> |
19 | #include <QXmlStreamReader> |
20 | |
21 | #ifdef HAS_XERCESC |
22 | |
23 | #include <xercesc/framework/MemBufInputSource.hpp> |
24 | #include <xercesc/framework/XMLGrammarPoolImpl.hpp> |
25 | |
26 | #include <xercesc/parsers/SAX2XMLReaderImpl.hpp> |
27 | |
28 | #include <xercesc/sax/ErrorHandler.hpp> |
29 | #include <xercesc/sax/SAXParseException.hpp> |
30 | |
31 | #include <xercesc/util/PlatformUtils.hpp> |
32 | #include <xercesc/util/XMLString.hpp> |
33 | #include <xercesc/util/XMLUni.hpp> |
34 | |
35 | #include <xercesc/framework/XMLGrammarPoolImpl.hpp> |
36 | #include <xercesc/validators/common/Grammar.hpp> |
37 | |
38 | using namespace xercesc; |
39 | |
40 | /* |
41 | * Ideas taken from: |
42 | * |
43 | * author : Boris Kolpackov <boris@codesynthesis.com> |
44 | * copyright : not copyrighted - public domain |
45 | * |
46 | * This program uses Xerces-C++ SAX2 parser to load a set of schema files |
47 | * and then to validate a set of XML documents against these schemas. To |
48 | * build this program you will need Xerces-C++ 3.0.0 or later. For more |
49 | * information, see: |
50 | * |
51 | * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/ |
52 | */ |
53 | |
54 | /** |
55 | * Error handler object used during xml schema validation. |
56 | */ |
57 | class CustomErrorHandler : public ErrorHandler |
58 | { |
59 | public: |
60 | /** |
61 | * Constructor |
62 | * @param messages Pointer to the error message string to fill. |
63 | */ |
64 | CustomErrorHandler(QString *messages) |
65 | : m_messages(messages) |
66 | { |
67 | } |
68 | |
69 | /** |
70 | * Check global success/fail state. |
71 | * @return True if there was a failure, false otherwise. |
72 | */ |
73 | bool failed() const |
74 | { |
75 | return m_failed; |
76 | } |
77 | |
78 | private: |
79 | /** |
80 | * Severity classes for error messages. |
81 | */ |
82 | enum severity { s_warning, s_error, s_fatal }; |
83 | |
84 | /** |
85 | * Wrapper for warning exceptions. |
86 | * @param e Exception to handle. |
87 | */ |
88 | void warning(const SAXParseException &e) override |
89 | { |
90 | m_failed = true; // be strict, warnings are evil, too! |
91 | handle(e, s_warning); |
92 | } |
93 | |
94 | /** |
95 | * Wrapper for error exceptions. |
96 | * @param e Exception to handle. |
97 | */ |
98 | void error(const SAXParseException &e) override |
99 | { |
100 | m_failed = true; |
101 | handle(e, s_error); |
102 | } |
103 | |
104 | /** |
105 | * Wrapper for fatal error exceptions. |
106 | * @param e Exception to handle. |
107 | */ |
108 | void fatalError(const SAXParseException &e) override |
109 | { |
110 | m_failed = true; |
111 | handle(e, s_fatal); |
112 | } |
113 | |
114 | /** |
115 | * Reset the error status to "no error". |
116 | */ |
117 | void resetErrors() override |
118 | { |
119 | m_failed = false; |
120 | } |
121 | |
122 | /** |
123 | * Generic handler for error/warning/fatal error message exceptions. |
124 | * @param e Exception to handle. |
125 | * @param s Enum value encoding the message severtity. |
126 | */ |
127 | void handle(const SAXParseException &e, severity s) |
128 | { |
129 | // get id to print |
130 | const XMLCh *xid(e.getPublicId()); |
131 | if (!xid) |
132 | xid = e.getSystemId(); |
133 | |
134 | m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: " ) |
135 | << QString::fromUtf16(e.getMessage()) << Qt::endl; |
136 | } |
137 | |
138 | private: |
139 | /** |
140 | * Storage for created error messages in this handler. |
141 | */ |
142 | QTextStream m_messages; |
143 | |
144 | /** |
145 | * Global error state. True if there was an error, false otherwise. |
146 | */ |
147 | bool m_failed = false; |
148 | }; |
149 | |
150 | class CustomXMLValidator : public SAX2XMLReaderImpl |
151 | { |
152 | public: |
153 | QString messages; |
154 | CustomErrorHandler eh{&messages}; |
155 | |
156 | CustomXMLValidator(XMLGrammarPool *xsd) |
157 | : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd) |
158 | { |
159 | // Commonly useful configuration. |
160 | // |
161 | setFeature(XMLUni::fgSAX2CoreNameSpaces, true); |
162 | setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true); |
163 | setFeature(XMLUni::fgSAX2CoreValidation, true); |
164 | |
165 | // Enable validation. |
166 | // |
167 | setFeature(XMLUni::fgXercesSchema, true); |
168 | setFeature(XMLUni::fgXercesSchemaFullChecking, true); |
169 | setFeature(XMLUni::fgXercesValidationErrorAsFatal, true); |
170 | |
171 | // Use the loaded grammar during parsing. |
172 | // |
173 | setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true); |
174 | |
175 | // Don't load schemas from any other source (e.g., from XML document's |
176 | // xsi:schemaLocation attributes). |
177 | // |
178 | setFeature(XMLUni::fgXercesLoadSchema, false); |
179 | |
180 | // Xerces-C++ 3.1.0 is the first version with working multi import |
181 | // support. |
182 | // |
183 | setFeature(XMLUni::fgXercesHandleMultipleImports, true); |
184 | |
185 | setErrorHandler(&eh); |
186 | } |
187 | }; |
188 | |
189 | #endif |
190 | |
191 | #include "../lib/worddelimiters_p.h" |
192 | #include "../lib/xml_p.h" |
193 | |
194 | #include <array> |
195 | |
196 | using KSyntaxHighlighting::WordDelimiters; |
197 | using KSyntaxHighlighting::Xml::attrToBool; |
198 | |
199 | using namespace Qt::Literals::StringLiterals; |
200 | |
201 | #if QT_VERSION < QT_VERSION_CHECK(6, 10, 0) |
202 | static constexpr QStringView operator""_sv (const char16_t *s, std::size_t n) |
203 | { |
204 | return QStringView(s, s + n); |
205 | } |
206 | #endif |
207 | |
208 | namespace |
209 | { |
210 | |
211 | struct KateVersion { |
212 | int majorRevision; |
213 | int minorRevision; |
214 | |
215 | KateVersion(int majorRevision = 0, int minorRevision = 0) |
216 | : majorRevision(majorRevision) |
217 | , minorRevision(minorRevision) |
218 | { |
219 | } |
220 | |
221 | bool operator<(const KateVersion &version) const |
222 | { |
223 | return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision); |
224 | } |
225 | }; |
226 | |
227 | class HlFilesChecker |
228 | { |
229 | public: |
230 | void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames) |
231 | { |
232 | m_currentDefinition = &*m_definitions.insert(key: name, value: Definition{}); |
233 | m_currentDefinition->languageName = name; |
234 | m_currentDefinition->filename = filename; |
235 | m_currentDefinition->kateVersionStr = verStr.toString(); |
236 | m_currentKeywords = nullptr; |
237 | m_currentContext = nullptr; |
238 | |
239 | const auto idx = verStr.indexOf(c: u'.'); |
240 | if (idx <= 0) { |
241 | qWarning() << filename << "invalid kateversion" << verStr; |
242 | m_success = false; |
243 | } else { |
244 | m_currentDefinition->kateVersion = {verStr.sliced(pos: 0, n: idx).toInt(), verStr.sliced(pos: idx + 1).toInt()}; |
245 | } |
246 | |
247 | auto checkName = [this, &filename](char const *nameType, const QString &name) { |
248 | auto it = m_names.find(key: name); |
249 | if (it != m_names.end()) { |
250 | qWarning() << filename << "duplicate" << nameType << "with" << it.value(); |
251 | m_success = false; |
252 | } else { |
253 | m_names.insert(key: name, value: filename); |
254 | } |
255 | }; |
256 | checkName("name" , name); |
257 | for (const auto &alternativeName : alternativeNames) { |
258 | checkName("alternative name" , alternativeName); |
259 | } |
260 | } |
261 | |
262 | KateVersion currentVersion() const |
263 | { |
264 | return m_currentDefinition->kateVersion; |
265 | } |
266 | |
267 | void processElement(const QXmlStreamReader &xml) |
268 | { |
269 | switch (xml.tokenType()) { |
270 | case QXmlStreamReader::StartElement: |
271 | if (m_currentContext) { |
272 | m_currentContext->rules.push_back(t: Context::Rule{}); |
273 | auto &rule = m_currentContext->rules.back(); |
274 | m_success = rule.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
275 | m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True; |
276 | } else if (m_currentKeywords) { |
277 | m_inKeywordItem = true; |
278 | } else if (xml.name() == u"context"_sv ) { |
279 | processContextElement(xml); |
280 | } else if (xml.name() == u"list"_sv ) { |
281 | processListElement(xml); |
282 | } else if (xml.name() == u"keywords"_sv ) { |
283 | m_success = m_currentDefinition->parseKeywords(xml) && m_success; |
284 | } else if (xml.name() == u"emptyLine"_sv ) { |
285 | m_success = parseEmptyLine(filename: m_currentDefinition->filename, xml) && m_success; |
286 | } else if (xml.name() == u"itemData"_sv ) { |
287 | m_success = m_currentDefinition->itemDatas.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
288 | } |
289 | break; |
290 | |
291 | case QXmlStreamReader::EndElement: |
292 | if (m_currentContext && xml.name() == u"context"_sv ) { |
293 | m_currentContext = nullptr; |
294 | } else if (m_currentKeywords && xml.name() == u"list"_sv ) { |
295 | m_currentKeywords = nullptr; |
296 | } else if (m_currentKeywords) { |
297 | m_success = m_currentKeywords->items.parseElement(filename: m_currentDefinition->filename, xml, content: m_textContent) && m_success; |
298 | m_textContent.clear(); |
299 | m_inKeywordItem = false; |
300 | } |
301 | break; |
302 | |
303 | case QXmlStreamReader::EntityReference: |
304 | case QXmlStreamReader::Characters: |
305 | if (m_inKeywordItem) { |
306 | m_textContent += xml.text(); |
307 | } |
308 | break; |
309 | |
310 | default:; |
311 | } |
312 | } |
313 | |
314 | //! Resolve context attribute and include tag |
315 | void resolveContexts() |
316 | { |
317 | QMutableMapIterator<QString, Definition> def(m_definitions); |
318 | while (def.hasNext()) { |
319 | def.next(); |
320 | auto &definition = def.value(); |
321 | auto &contexts = definition.contexts; |
322 | |
323 | if (contexts.isEmpty()) { |
324 | qWarning() << definition.filename << "has no context" ; |
325 | m_success = false; |
326 | continue; |
327 | } |
328 | |
329 | auto markAsUsedContext = [](ContextName &contextName) { |
330 | if (!contextName.stay && contextName.context) { |
331 | contextName.context->isOnlyIncluded = false; |
332 | } |
333 | }; |
334 | |
335 | QMutableMapIterator<QString, Context> contextIt(contexts); |
336 | while (contextIt.hasNext()) { |
337 | contextIt.next(); |
338 | auto &context = contextIt.value(); |
339 | resolveContextName(definition, context, contextName&: context.lineEndContext, line: context.line); |
340 | resolveContextName(definition, context, contextName&: context.lineEmptyContext, line: context.line); |
341 | resolveContextName(definition, context, contextName&: context.fallthroughContext, line: context.line); |
342 | markAsUsedContext(context.lineEndContext); |
343 | markAsUsedContext(context.lineEmptyContext); |
344 | markAsUsedContext(context.fallthroughContext); |
345 | for (auto &rule : context.rules) { |
346 | rule.parentContext = &context; |
347 | resolveContextName(definition, context, contextName&: rule.context, line: rule.line); |
348 | if (rule.type != Context::Rule::Type::IncludeRules) { |
349 | markAsUsedContext(rule.context); |
350 | } else if (rule.includeAttrib == XmlBool::True && rule.context.context) { |
351 | rule.context.context->referencedWithIncludeAttrib = true; |
352 | } |
353 | } |
354 | } |
355 | |
356 | auto *firstContext = &*definition.contexts.find(key: definition.firstContextName); |
357 | firstContext->isOnlyIncluded = false; |
358 | definition.firstContext = firstContext; |
359 | } |
360 | |
361 | resolveIncludeRules(); |
362 | } |
363 | |
364 | bool check() const |
365 | { |
366 | bool success = m_success; |
367 | |
368 | const auto usedContexts = extractUsedContexts(); |
369 | |
370 | QMap<const Definition *, const Definition *> maxVersionByDefinitions; |
371 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules; |
372 | |
373 | QMapIterator<QString, Definition> def(m_definitions); |
374 | while (def.hasNext()) { |
375 | def.next(); |
376 | const auto &definition = def.value(); |
377 | const auto &filename = definition.filename; |
378 | |
379 | auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions); |
380 | if (maxDef != &definition) { |
381 | qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr |
382 | << ". Please, increase kateversion." ; |
383 | success = false; |
384 | } |
385 | |
386 | QSet<ItemDatas::Style> usedAttributeNames; |
387 | QSet<ItemDatas::Style> ignoredAttributeNames; |
388 | success = checkKeywordsList(definition) && success; |
389 | success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success; |
390 | |
391 | // search for non-existing itemDatas. |
392 | const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames; |
393 | for (const auto &styleName : invalidNames) { |
394 | qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name; |
395 | success = false; |
396 | } |
397 | |
398 | // search for existing itemDatas, but unusable. |
399 | const auto ignoredNames = ignoredAttributeNames - usedAttributeNames; |
400 | for (const auto &styleName : ignoredNames) { |
401 | qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name |
402 | << "is never used. All uses are with lookAhead=true or <IncludeRules/>" ; |
403 | success = false; |
404 | } |
405 | |
406 | // search for unused itemDatas. |
407 | auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames; |
408 | unusedNames -= ignoredNames; |
409 | for (const auto &styleName : std::as_const(t&: unusedNames)) { |
410 | qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name; |
411 | success = false; |
412 | } |
413 | } |
414 | |
415 | QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules); |
416 | while (unreachableIncludedRuleIt.hasNext()) { |
417 | unreachableIncludedRuleIt.next(); |
418 | IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value(); |
419 | if (unreachableRulesBy.alwaysUnreachable) { |
420 | auto *rule = unreachableIncludedRuleIt.key(); |
421 | |
422 | if (!rule->parentContext->isOnlyIncluded) { |
423 | continue; |
424 | } |
425 | |
426 | // remove duplicates rules |
427 | QSet<const Context::Rule *> rules; |
428 | auto &unreachableBy = unreachableRulesBy.unreachableBy; |
429 | unreachableBy.erase(abegin: std::remove_if(first: unreachableBy.begin(), |
430 | last: unreachableBy.end(), |
431 | pred: [&](const RuleAndInclude &ruleAndInclude) { |
432 | if (rules.contains(value: ruleAndInclude.rule)) { |
433 | return true; |
434 | } |
435 | rules.insert(value: ruleAndInclude.rule); |
436 | return false; |
437 | }), |
438 | aend: unreachableBy.end()); |
439 | |
440 | QString message; |
441 | message.reserve(asize: 128); |
442 | for (auto &ruleAndInclude : std::as_const(t&: unreachableBy)) { |
443 | message += u"line "_sv ; |
444 | message += QString::number(ruleAndInclude.rule->line); |
445 | message += u" ["_sv ; |
446 | message += ruleAndInclude.rule->parentContext->name; |
447 | if (rule->filename != ruleAndInclude.rule->filename) { |
448 | message += u" ("_sv ; |
449 | message += ruleAndInclude.rule->filename; |
450 | message += u')'; |
451 | } |
452 | if (ruleAndInclude.includeRules) { |
453 | message += u" via line "_sv ; |
454 | message += QString::number(ruleAndInclude.includeRules->line); |
455 | } |
456 | message += u"], "_sv ; |
457 | } |
458 | message.chop(n: 2); |
459 | |
460 | qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message; |
461 | success = false; |
462 | } |
463 | } |
464 | |
465 | return success; |
466 | } |
467 | |
468 | private: |
469 | enum class XmlBool { |
470 | Unspecified, |
471 | False, |
472 | True, |
473 | }; |
474 | |
475 | struct Context; |
476 | |
477 | struct ContextName { |
478 | QString name; |
479 | int popCount = 0; |
480 | bool stay = false; |
481 | |
482 | Context *context = nullptr; |
483 | }; |
484 | |
485 | struct Parser { |
486 | const QString &filename; |
487 | const QXmlStreamReader &xml; |
488 | const QXmlStreamAttribute &attr; |
489 | bool success; |
490 | |
491 | //! Read a string type attribute, \c success = \c false when \p str is not empty |
492 | //! \return \c true when attr.name() == attrName, otherwise false |
493 | bool (QString &str, QStringView attrName) |
494 | { |
495 | if (attr.name() != attrName) { |
496 | return false; |
497 | } |
498 | |
499 | str = attr.value().toString(); |
500 | if (str.isEmpty()) { |
501 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty" ; |
502 | success = false; |
503 | } |
504 | |
505 | return true; |
506 | } |
507 | |
508 | //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified. |
509 | //! \return \c true when attr.name() == attrName, otherwise false |
510 | bool (XmlBool &xmlBool, QStringView attrName) |
511 | { |
512 | if (attr.name() != attrName) { |
513 | return false; |
514 | } |
515 | |
516 | xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(str: attr.value()) ? XmlBool::True : XmlBool::False; |
517 | |
518 | return true; |
519 | } |
520 | |
521 | //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0 |
522 | //! \return \c true when attr.name() == attrName, otherwise false |
523 | bool (int &positive, QStringView attrName) |
524 | { |
525 | if (attr.name() != attrName) { |
526 | return false; |
527 | } |
528 | |
529 | bool ok = true; |
530 | positive = attr.value().toInt(ok: &ok); |
531 | |
532 | if (!ok || positive < 0) { |
533 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value(); |
534 | success = false; |
535 | } |
536 | |
537 | return true; |
538 | } |
539 | |
540 | //! Read a color, \c success = \c false when \p color is already greater than or equal to 0 |
541 | //! \return \c true when attr.name() == attrName, otherwise false |
542 | bool checkColor(QStringView attrName) |
543 | { |
544 | if (attr.name() != attrName) { |
545 | return false; |
546 | } |
547 | |
548 | const auto value = attr.value(); |
549 | if (value.isEmpty() /*|| QColor(value).isValid()*/) { |
550 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value; |
551 | success = false; |
552 | } |
553 | |
554 | return true; |
555 | } |
556 | |
557 | //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char |
558 | //! \return \c true when attr.name() == attrName, otherwise false |
559 | bool (QChar &c, QStringView attrName) |
560 | { |
561 | if (attr.name() != attrName) { |
562 | return false; |
563 | } |
564 | |
565 | if (attr.value().size() == 1) { |
566 | c = attr.value()[0]; |
567 | } else { |
568 | c = u'_'; |
569 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value(); |
570 | success = false; |
571 | } |
572 | |
573 | return true; |
574 | } |
575 | |
576 | //! \return parsing status when \p isExtracted is \c true, otherwise \c false |
577 | bool (bool ) |
578 | { |
579 | if (isExtracted) { |
580 | return success; |
581 | } |
582 | |
583 | qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name(); |
584 | return false; |
585 | } |
586 | }; |
587 | |
588 | struct Keywords { |
589 | struct Items { |
590 | struct Item { |
591 | QString content; |
592 | int line; |
593 | |
594 | friend size_t qHash(const Item &item, size_t seed = 0) |
595 | { |
596 | return qHash(key: item.content, seed); |
597 | } |
598 | |
599 | friend bool operator==(const Item &item0, const Item &item1) |
600 | { |
601 | return item0.content == item1.content; |
602 | } |
603 | }; |
604 | |
605 | QList<Item> keywords; |
606 | QSet<Item> includes; |
607 | |
608 | bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content) |
609 | { |
610 | bool success = true; |
611 | |
612 | const int line = xml.lineNumber(); |
613 | |
614 | if (content.isEmpty()) { |
615 | qWarning() << filename << "line" << line << "is empty:" << xml.name(); |
616 | success = false; |
617 | } |
618 | |
619 | if (xml.name() == u"include"_sv ) { |
620 | includes.insert(value: {.content: content, .line: line}); |
621 | } else if (xml.name() == u"item"_sv ) { |
622 | keywords.append(t: {.content: content, .line: line}); |
623 | } else { |
624 | qWarning() << filename << "line" << line << "invalid element:" << xml.name(); |
625 | success = false; |
626 | } |
627 | |
628 | return success; |
629 | } |
630 | }; |
631 | |
632 | QString name; |
633 | Items items; |
634 | int line; |
635 | |
636 | bool parseElement(const QString &filename, const QXmlStreamReader &xml) |
637 | { |
638 | line = xml.lineNumber(); |
639 | |
640 | bool success = true; |
641 | const auto attrs = xml.attributes(); |
642 | for (const auto &attr : attrs) { |
643 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
644 | |
645 | const bool = parser.extractString(str&: name, attrName: u"name"_sv ); |
646 | |
647 | success = parser.checkIfExtracted(isExtracted); |
648 | } |
649 | return success; |
650 | } |
651 | }; |
652 | |
653 | struct Context { |
654 | struct Rule { |
655 | enum class Type { |
656 | Unknown, |
657 | AnyChar, |
658 | Detect2Chars, |
659 | DetectChar, |
660 | DetectIdentifier, |
661 | DetectSpaces, |
662 | Float, |
663 | HlCChar, |
664 | HlCHex, |
665 | HlCOct, |
666 | HlCStringChar, |
667 | IncludeRules, |
668 | Int, |
669 | LineContinue, |
670 | RangeDetect, |
671 | RegExpr, |
672 | StringDetect, |
673 | WordDetect, |
674 | keyword, |
675 | }; |
676 | |
677 | Type type{}; |
678 | |
679 | bool isDotRegex = false; |
680 | int line = -1; |
681 | |
682 | // commonAttributes |
683 | QString attribute; |
684 | ContextName context; |
685 | QString beginRegion; |
686 | QString endRegion; |
687 | int column = -1; |
688 | XmlBool lookAhead{}; |
689 | XmlBool firstNonSpace{}; |
690 | |
691 | // StringDetect, WordDetect, keyword |
692 | XmlBool insensitive{}; |
693 | |
694 | // DetectChar, StringDetect, RegExpr, keyword |
695 | XmlBool dynamic{}; |
696 | |
697 | // Regex |
698 | XmlBool minimal{}; |
699 | |
700 | // IncludeRule |
701 | XmlBool includeAttrib{}; |
702 | |
703 | // DetectChar, Detect2Chars, LineContinue, RangeDetect |
704 | QChar char0; |
705 | // Detect2Chars, RangeDetect |
706 | QChar char1; |
707 | |
708 | // AnyChar, StringDetect, RegExpr, WordDetect, keyword |
709 | QString string; |
710 | // RegExpr without .* as suffix |
711 | QString sanitizedString; |
712 | |
713 | // Float, HlCHex, HlCOct, Int, WordDetect, keyword |
714 | QString additionalDeliminator; |
715 | QString weakDeliminator; |
716 | |
717 | // rules included by IncludeRules (without IncludeRule) |
718 | QList<const Rule *> includedRules; |
719 | |
720 | // IncludeRules included by IncludeRules |
721 | QSet<const Rule *> includedIncludeRules; |
722 | |
723 | Context const *parentContext = nullptr; |
724 | |
725 | QString filename; |
726 | |
727 | bool parseElement(const QString &filename, const QXmlStreamReader &xml) |
728 | { |
729 | this->filename = filename; |
730 | line = xml.lineNumber(); |
731 | |
732 | using Pair = QPair<QStringView, Type>; |
733 | static const auto pairs = { |
734 | Pair{u"AnyChar"_sv , Type::AnyChar}, |
735 | Pair{u"Detect2Chars"_sv , Type::Detect2Chars}, |
736 | Pair{u"DetectChar"_sv , Type::DetectChar}, |
737 | Pair{u"DetectIdentifier"_sv , Type::DetectIdentifier}, |
738 | Pair{u"DetectSpaces"_sv , Type::DetectSpaces}, |
739 | Pair{u"Float"_sv , Type::Float}, |
740 | Pair{u"HlCChar"_sv , Type::HlCChar}, |
741 | Pair{u"HlCHex"_sv , Type::HlCHex}, |
742 | Pair{u"HlCOct"_sv , Type::HlCOct}, |
743 | Pair{u"HlCStringChar"_sv , Type::HlCStringChar}, |
744 | Pair{u"IncludeRules"_sv , Type::IncludeRules}, |
745 | Pair{u"Int"_sv , Type::Int}, |
746 | Pair{u"LineContinue"_sv , Type::LineContinue}, |
747 | Pair{u"RangeDetect"_sv , Type::RangeDetect}, |
748 | Pair{u"RegExpr"_sv , Type::RegExpr}, |
749 | Pair{u"StringDetect"_sv , Type::StringDetect}, |
750 | Pair{u"WordDetect"_sv , Type::WordDetect}, |
751 | Pair{u"keyword" , Type::keyword}, |
752 | }; |
753 | |
754 | for (auto pair : pairs) { |
755 | if (xml.name() == pair.first) { |
756 | type = pair.second; |
757 | bool success = parseAttributes(filename, xml); |
758 | success = checkMandoryAttributes(filename, xml) && success; |
759 | if (success && type == Type::RegExpr) { |
760 | // ., (.) followed by *, +, {1} or nothing |
761 | static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)" )); |
762 | // remove "(?:" and ")" |
763 | static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))" )); |
764 | // remove parentheses on a copy of string |
765 | auto reg = QString(string).replace(re: removeParentheses, after: QString()); |
766 | isDotRegex = reg.contains(re: isDot); |
767 | |
768 | // Remove .* and .*$ suffix. |
769 | static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$" )); |
770 | sanitizedString = string; |
771 | sanitizedString.replace(re: allSuffix, after: QString()); |
772 | // string is a catch-all, do not sanitize |
773 | if (sanitizedString.isEmpty() || sanitizedString == u"^"_sv ) { |
774 | sanitizedString = string; |
775 | } |
776 | } |
777 | return success; |
778 | } |
779 | } |
780 | |
781 | qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name(); |
782 | return false; |
783 | } |
784 | |
785 | private: |
786 | bool parseAttributes(const QString &filename, const QXmlStreamReader &xml) |
787 | { |
788 | bool success = true; |
789 | |
790 | const auto attrs = xml.attributes(); |
791 | for (const auto &attr : attrs) { |
792 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
793 | |
794 | // clang-format off |
795 | const bool |
796 | = parser.extractString(str&: attribute, attrName: u"attribute"_sv ) |
797 | || parser.extractString(str&: context.name, attrName: u"context"_sv ) |
798 | || parser.extractXmlBool(xmlBool&: lookAhead, attrName: u"lookAhead"_sv ) |
799 | || parser.extractXmlBool(xmlBool&: firstNonSpace, attrName: u"firstNonSpace"_sv ) |
800 | || parser.extractString(str&: beginRegion, attrName: u"beginRegion"_sv ) |
801 | || parser.extractString(str&: endRegion, attrName: u"endRegion"_sv ) |
802 | || parser.extractPositive(positive&: column, attrName: u"column"_sv ) |
803 | || ((type == Type::RegExpr |
804 | || type == Type::StringDetect |
805 | || type == Type::WordDetect |
806 | || type == Type::keyword |
807 | ) && parser.extractXmlBool(xmlBool&: insensitive, attrName: u"insensitive"_sv )) |
808 | || ((type == Type::DetectChar |
809 | || type == Type::RegExpr |
810 | || type == Type::StringDetect |
811 | || type == Type::keyword |
812 | ) && parser.extractXmlBool(xmlBool&: dynamic, attrName: u"dynamic"_sv )) |
813 | || ((type == Type::RegExpr) |
814 | && parser.extractXmlBool(xmlBool&: minimal, attrName: u"minimal"_sv )) |
815 | || ((type == Type::DetectChar |
816 | || type == Type::Detect2Chars |
817 | || type == Type::LineContinue |
818 | || type == Type::RangeDetect |
819 | ) && parser.extractChar(c&: char0, attrName: u"char"_sv )) |
820 | || ((type == Type::Detect2Chars |
821 | || type == Type::RangeDetect |
822 | ) && parser.extractChar(c&: char1, attrName: u"char1"_sv )) |
823 | || ((type == Type::AnyChar |
824 | || type == Type::RegExpr |
825 | || type == Type::StringDetect |
826 | || type == Type::WordDetect |
827 | || type == Type::keyword |
828 | ) && parser.extractString(str&: string, attrName: u"String"_sv )) |
829 | || ((type == Type::IncludeRules) |
830 | && parser.extractXmlBool(xmlBool&: includeAttrib, attrName: u"includeAttrib"_sv )) |
831 | || ((type == Type::Float |
832 | || type == Type::HlCHex |
833 | || type == Type::HlCOct |
834 | || type == Type::Int |
835 | || type == Type::keyword |
836 | || type == Type::WordDetect |
837 | ) && (parser.extractString(str&: additionalDeliminator, attrName: u"additionalDeliminator"_sv ) |
838 | || parser.extractString(str&: weakDeliminator, attrName: u"weakDeliminator"_sv ))) |
839 | ; |
840 | // clang-format on |
841 | |
842 | success = parser.checkIfExtracted(isExtracted); |
843 | } |
844 | |
845 | if (type == Type::LineContinue && char0 == u'\0') { |
846 | char0 = u'\\'; |
847 | } |
848 | |
849 | return success; |
850 | } |
851 | |
852 | bool checkMandoryAttributes(const QString &filename, const QXmlStreamReader &xml) |
853 | { |
854 | QString missingAttr; |
855 | |
856 | switch (type) { |
857 | case Type::Unknown: |
858 | return false; |
859 | |
860 | case Type::AnyChar: |
861 | case Type::RegExpr: |
862 | case Type::StringDetect: |
863 | case Type::WordDetect: |
864 | case Type::keyword: |
865 | missingAttr = string.isEmpty() ? QStringLiteral("String" ) : QString(); |
866 | break; |
867 | |
868 | case Type::DetectChar: |
869 | missingAttr = !char0.unicode() ? QStringLiteral("char" ) : QString(); |
870 | break; |
871 | |
872 | case Type::Detect2Chars: |
873 | case Type::RangeDetect: |
874 | missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1" ) |
875 | : !char0.unicode() ? QStringLiteral("char" ) |
876 | : !char1.unicode() ? QStringLiteral("char1" ) |
877 | : QString(); |
878 | break; |
879 | |
880 | case Type::IncludeRules: |
881 | missingAttr = context.name.isEmpty() ? QStringLiteral("context" ) : QString(); |
882 | break; |
883 | |
884 | case Type::DetectIdentifier: |
885 | case Type::DetectSpaces: |
886 | case Type::Float: |
887 | case Type::HlCChar: |
888 | case Type::HlCHex: |
889 | case Type::HlCOct: |
890 | case Type::HlCStringChar: |
891 | case Type::Int: |
892 | case Type::LineContinue: |
893 | break; |
894 | } |
895 | |
896 | if (!missingAttr.isEmpty()) { |
897 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr; |
898 | return false; |
899 | } |
900 | |
901 | return true; |
902 | } |
903 | }; |
904 | |
905 | int line; |
906 | // becomes false when a context (except includeRule) refers to it |
907 | bool isOnlyIncluded = true; |
908 | // becomes true when an includedRule refers to it with includeAttrib=true |
909 | bool referencedWithIncludeAttrib = false; |
910 | bool hasDynamicRule = false; |
911 | QString name; |
912 | QString attribute; |
913 | ContextName lineEndContext; |
914 | ContextName lineEmptyContext; |
915 | ContextName fallthroughContext; |
916 | QList<Rule> rules; |
917 | XmlBool dynamic{}; |
918 | XmlBool fallthrough{}; |
919 | XmlBool stopEmptyLineContextSwitchLoop{}; |
920 | |
921 | bool parseElement(const QString &filename, const QXmlStreamReader &xml) |
922 | { |
923 | line = xml.lineNumber(); |
924 | |
925 | bool success = true; |
926 | |
927 | const auto attrs = xml.attributes(); |
928 | for (const auto &attr : attrs) { |
929 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
930 | XmlBool noIndentationBasedFolding{}; |
931 | |
932 | // clang-format off |
933 | const bool = parser.extractString(str&: name, attrName: u"name"_sv ) |
934 | || parser.extractString(str&: attribute, attrName: u"attribute"_sv ) |
935 | || parser.extractString(str&: lineEndContext.name, attrName: u"lineEndContext"_sv ) |
936 | || parser.extractString(str&: lineEmptyContext.name, attrName: u"lineEmptyContext"_sv ) |
937 | || parser.extractString(str&: fallthroughContext.name, attrName: u"fallthroughContext"_sv ) |
938 | || parser.extractXmlBool(xmlBool&: dynamic, attrName: u"dynamic"_sv ) |
939 | || parser.extractXmlBool(xmlBool&: fallthrough, attrName: u"fallthrough"_sv ) |
940 | || parser.extractXmlBool(xmlBool&: stopEmptyLineContextSwitchLoop, attrName: u"stopEmptyLineContextSwitchLoop"_sv ) |
941 | || parser.extractXmlBool(xmlBool&: noIndentationBasedFolding, attrName: u"noIndentationBasedFolding"_sv ); |
942 | // clang-format on |
943 | |
944 | success = parser.checkIfExtracted(isExtracted); |
945 | } |
946 | |
947 | if (name.isEmpty()) { |
948 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name" ; |
949 | success = false; |
950 | } |
951 | |
952 | if (attribute.isEmpty()) { |
953 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute" ; |
954 | success = false; |
955 | } |
956 | |
957 | return success; |
958 | } |
959 | }; |
960 | |
961 | struct ItemDatas { |
962 | struct Style { |
963 | QString name; |
964 | int line; |
965 | |
966 | friend size_t qHash(const Style &style, size_t seed = 0) |
967 | { |
968 | return qHash(key: style.name, seed); |
969 | } |
970 | |
971 | friend bool operator==(const Style &style0, const Style &style1) |
972 | { |
973 | return style0.name == style1.name; |
974 | } |
975 | }; |
976 | |
977 | QSet<Style> styleNames; |
978 | |
979 | bool parseElement(const QString &filename, const QXmlStreamReader &xml) |
980 | { |
981 | bool success = true; |
982 | |
983 | QString name; |
984 | QString defStyleNum; |
985 | XmlBool boolean; |
986 | |
987 | const auto attrs = xml.attributes(); |
988 | for (const auto &attr : attrs) { |
989 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
990 | |
991 | // clang-format off |
992 | const bool |
993 | = parser.extractString(str&: name, attrName: u"name"_sv ) |
994 | || parser.extractString(str&: defStyleNum, attrName: u"defStyleNum"_sv ) |
995 | || parser.extractXmlBool(xmlBool&: boolean, attrName: u"bold"_sv ) |
996 | || parser.extractXmlBool(xmlBool&: boolean, attrName: u"italic"_sv ) |
997 | || parser.extractXmlBool(xmlBool&: boolean, attrName: u"underline"_sv ) |
998 | || parser.extractXmlBool(xmlBool&: boolean, attrName: u"strikeOut"_sv ) |
999 | || parser.extractXmlBool(xmlBool&: boolean, attrName: u"spellChecking"_sv ) |
1000 | || parser.checkColor(attrName: u"color"_sv ) |
1001 | || parser.checkColor(attrName: u"selColor"_sv ) |
1002 | || parser.checkColor(attrName: u"backgroundColor"_sv ) |
1003 | || parser.checkColor(attrName: u"selBackgroundColor"_sv ); |
1004 | // clang-format on |
1005 | |
1006 | success = parser.checkIfExtracted(isExtracted); |
1007 | } |
1008 | |
1009 | if (!name.isEmpty()) { |
1010 | const auto len = styleNames.size(); |
1011 | styleNames.insert(value: {.name: name, .line: int(xml.lineNumber())}); |
1012 | if (len == styleNames.size()) { |
1013 | qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name; |
1014 | success = false; |
1015 | } |
1016 | } |
1017 | |
1018 | return success; |
1019 | } |
1020 | }; |
1021 | |
1022 | struct Definition { |
1023 | QMap<QString, Keywords> keywordsList; |
1024 | QMap<QString, Context> contexts; |
1025 | ItemDatas itemDatas; |
1026 | QString firstContextName; |
1027 | const Context *firstContext = nullptr; |
1028 | QString filename; |
1029 | WordDelimiters wordDelimiters; |
1030 | KateVersion kateVersion{}; |
1031 | QString kateVersionStr; |
1032 | QString languageName; |
1033 | QSet<const Definition *> referencedDefinitions; |
1034 | |
1035 | // Parse <keywords ...> |
1036 | bool parseKeywords(const QXmlStreamReader &xml) |
1037 | { |
1038 | wordDelimiters.append(s: xml.attributes().value(qualifiedName: u"additionalDeliminator"_sv )); |
1039 | wordDelimiters.remove(c: xml.attributes().value(qualifiedName: u"weakDeliminator"_sv )); |
1040 | return true; |
1041 | } |
1042 | }; |
1043 | |
1044 | // Parse <context> |
1045 | void processContextElement(const QXmlStreamReader &xml) |
1046 | { |
1047 | Context context; |
1048 | m_success = context.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
1049 | if (m_currentDefinition->firstContextName.isEmpty()) { |
1050 | m_currentDefinition->firstContextName = context.name; |
1051 | } |
1052 | if (m_currentDefinition->contexts.contains(key: context.name)) { |
1053 | qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name; |
1054 | m_success = false; |
1055 | } |
1056 | m_currentContext = &*m_currentDefinition->contexts.insert(key: context.name, value: context); |
1057 | } |
1058 | |
1059 | // Parse <list name="..."> |
1060 | void processListElement(const QXmlStreamReader &xml) |
1061 | { |
1062 | Keywords keywords; |
1063 | m_success = keywords.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
1064 | if (m_currentDefinition->keywordsList.contains(key: keywords.name)) { |
1065 | qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name; |
1066 | m_success = false; |
1067 | } |
1068 | m_currentKeywords = &*m_currentDefinition->keywordsList.insert(key: keywords.name, value: keywords); |
1069 | } |
1070 | |
1071 | const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const |
1072 | { |
1073 | auto it = maxVersionByDefinitions.find(key: &definition); |
1074 | if (it != maxVersionByDefinitions.end()) { |
1075 | return it.value(); |
1076 | } else { |
1077 | auto it = maxVersionByDefinitions.insert(key: &definition, value: &definition); |
1078 | for (const auto &referencedDef : definition.referencedDefinitions) { |
1079 | auto *maxDef = maxKateVersionDefinition(definition: *referencedDef, maxVersionByDefinitions); |
1080 | if (it.value()->kateVersion < maxDef->kateVersion) { |
1081 | it.value() = maxDef; |
1082 | } |
1083 | } |
1084 | return it.value(); |
1085 | } |
1086 | } |
1087 | |
1088 | // Initialize the referenced rules (Rule::includedRules) |
1089 | void resolveIncludeRules() |
1090 | { |
1091 | QSet<const Context *> usedContexts; |
1092 | QList<const Context *> contexts; |
1093 | |
1094 | QMutableMapIterator<QString, Definition> def(m_definitions); |
1095 | while (def.hasNext()) { |
1096 | def.next(); |
1097 | auto &definition = def.value(); |
1098 | QMutableMapIterator<QString, Context> contextIt(definition.contexts); |
1099 | while (contextIt.hasNext()) { |
1100 | contextIt.next(); |
1101 | auto ¤tContext = contextIt.value(); |
1102 | for (auto &rule : currentContext.rules) { |
1103 | if (rule.type != Context::Rule::Type::IncludeRules) { |
1104 | continue; |
1105 | } |
1106 | |
1107 | if (rule.context.stay) { |
1108 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself" ; |
1109 | m_success = false; |
1110 | continue; |
1111 | } |
1112 | |
1113 | if (rule.context.popCount) { |
1114 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix" ; |
1115 | m_success = false; |
1116 | } |
1117 | |
1118 | if (!rule.context.context) { |
1119 | m_success = false; |
1120 | continue; |
1121 | } |
1122 | |
1123 | // resolve includedRules and includedIncludeRules |
1124 | |
1125 | usedContexts.clear(); |
1126 | usedContexts.insert(value: rule.context.context); |
1127 | contexts.clear(); |
1128 | contexts.append(t: rule.context.context); |
1129 | |
1130 | for (int i = 0; i < contexts.size(); ++i) { |
1131 | currentContext.hasDynamicRule = contexts[i]->hasDynamicRule; |
1132 | for (const auto &includedRule : contexts[i]->rules) { |
1133 | if (includedRule.type != Context::Rule::Type::IncludeRules) { |
1134 | rule.includedRules.append(t: &includedRule); |
1135 | } else if (&rule == &includedRule) { |
1136 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity" ; |
1137 | m_success = false; |
1138 | } else { |
1139 | rule.includedIncludeRules.insert(value: &includedRule); |
1140 | |
1141 | if (includedRule.includedRules.isEmpty()) { |
1142 | const auto *context = includedRule.context.context; |
1143 | if (context && !usedContexts.contains(value: context)) { |
1144 | contexts.append(t: context); |
1145 | usedContexts.insert(value: context); |
1146 | } |
1147 | } else { |
1148 | rule.includedRules.append(l: includedRule.includedRules); |
1149 | } |
1150 | } |
1151 | } |
1152 | } |
1153 | } |
1154 | } |
1155 | } |
1156 | } |
1157 | |
1158 | //! Recursively extracts the contexts used from the first context of the definitions. |
1159 | //! This method detects groups of contexts which are only used among themselves. |
1160 | QSet<const Context *> () const |
1161 | { |
1162 | QSet<const Context *> usedContexts; |
1163 | QList<const Context *> contexts; |
1164 | |
1165 | QMapIterator<QString, Definition> def(m_definitions); |
1166 | while (def.hasNext()) { |
1167 | def.next(); |
1168 | const auto &definition = def.value(); |
1169 | |
1170 | if (definition.firstContext) { |
1171 | usedContexts.insert(value: definition.firstContext); |
1172 | contexts.clear(); |
1173 | contexts.append(t: definition.firstContext); |
1174 | |
1175 | for (int i = 0; i < contexts.size(); ++i) { |
1176 | auto appendContext = [&](const Context *context) { |
1177 | if (context && !usedContexts.contains(value: context)) { |
1178 | contexts.append(t: context); |
1179 | usedContexts.insert(value: context); |
1180 | } |
1181 | }; |
1182 | |
1183 | const auto *context = contexts[i]; |
1184 | appendContext(context->lineEndContext.context); |
1185 | appendContext(context->lineEmptyContext.context); |
1186 | appendContext(context->fallthroughContext.context); |
1187 | |
1188 | for (auto &rule : context->rules) { |
1189 | appendContext(rule.context.context); |
1190 | } |
1191 | } |
1192 | } |
1193 | } |
1194 | |
1195 | return usedContexts; |
1196 | } |
1197 | |
1198 | struct RuleAndInclude { |
1199 | const Context::Rule *rule; |
1200 | const Context::Rule *includeRules; |
1201 | |
1202 | explicit operator bool() const |
1203 | { |
1204 | return rule; |
1205 | } |
1206 | }; |
1207 | |
1208 | struct IncludedRuleUnreachableBy { |
1209 | QList<RuleAndInclude> unreachableBy; |
1210 | bool alwaysUnreachable = true; |
1211 | }; |
1212 | |
1213 | //! Check contexts and rules |
1214 | bool checkContexts(const Definition &definition, |
1215 | QSet<ItemDatas::Style> &usedAttributeNames, |
1216 | QSet<ItemDatas::Style> &ignoredAttributeNames, |
1217 | const QSet<const Context *> &usedContexts, |
1218 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const |
1219 | { |
1220 | bool success = true; |
1221 | |
1222 | QMapIterator<QString, Context> contextIt(definition.contexts); |
1223 | while (contextIt.hasNext()) { |
1224 | contextIt.next(); |
1225 | |
1226 | const auto &context = contextIt.value(); |
1227 | const auto &filename = definition.filename; |
1228 | |
1229 | if (!usedContexts.contains(value: &context)) { |
1230 | qWarning() << filename << "line" << context.line << "unused context:" << context.name; |
1231 | success = false; |
1232 | continue; |
1233 | } |
1234 | |
1235 | if (context.name.startsWith(s: u"#pop"_sv )) { |
1236 | qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name; |
1237 | success = false; |
1238 | } |
1239 | |
1240 | if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) { |
1241 | usedAttributeNames.insert(value: {.name: context.attribute, .line: context.line}); |
1242 | } |
1243 | |
1244 | success = checkContextAttribute(definition, context) && success; |
1245 | success = checkUreachableRules(filename: definition.filename, context, unreachableIncludedRules) && success; |
1246 | success = suggestRuleMerger(filename: definition.filename, context) && success; |
1247 | |
1248 | for (const auto &rule : context.rules) { |
1249 | if (!rule.attribute.isEmpty()) { |
1250 | if (rule.lookAhead != XmlBool::True) { |
1251 | usedAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line}); |
1252 | } else { |
1253 | ignoredAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line}); |
1254 | } |
1255 | } |
1256 | success = checkLookAhead(rule) && success; |
1257 | success = checkStringDetect(rule) && success; |
1258 | success = checkWordDetect(rule) && success; |
1259 | success = checkKeyword(definition, rule) && success; |
1260 | success = checkRegExpr(filename, rule, context) && success; |
1261 | success = checkDelimiters(definition, rule) && success; |
1262 | } |
1263 | } |
1264 | |
1265 | return success; |
1266 | } |
1267 | |
1268 | //! Check that a regular expression in a RegExpr rule: |
1269 | //! - isValid() |
1270 | //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. |
1271 | //! - dynamic=true but no place holder used? |
1272 | //! - is not . with lookAhead="1" |
1273 | //! - is not ^... without column ou firstNonSpace attribute |
1274 | //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar |
1275 | //! - has no unused captures |
1276 | //! - has no unnecessary quantifier with lookAhead |
1277 | bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const |
1278 | { |
1279 | // ignore empty regex because the error is raised during xml parsing |
1280 | if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) { |
1281 | const QRegularExpression regexp(rule.string); |
1282 | if (!checkRegularExpression(filename: rule.filename, regexp, line: rule.line)) { |
1283 | return false; |
1284 | } |
1285 | |
1286 | // dynamic == true and no place holder? |
1287 | if (rule.dynamic == XmlBool::True) { |
1288 | static const QRegularExpression placeHolder(QStringLiteral("%\\d+" )); |
1289 | if (!rule.string.contains(re: placeHolder)) { |
1290 | qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder" ; |
1291 | return false; |
1292 | } |
1293 | } |
1294 | |
1295 | if (rule.lookAhead == XmlBool::True && (rule.string.endsWith(s: u".*$"_sv ) || rule.string.endsWith(s: u".*"_sv )) && -1 == rule.string.indexOf(ch: u'|')) { |
1296 | qWarning() << rule.filename << "line" << rule.line << "RegExpr with lookAhead=1 doesn't need to end with '.*' or '.*$':" << rule.string; |
1297 | return false; |
1298 | } |
1299 | |
1300 | auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string; |
1301 | if (rule.lookAhead == XmlBool::True) { |
1302 | static const QRegularExpression removeAllSuffix(QStringLiteral( |
1303 | R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)" )); |
1304 | reg.replace(re: removeAllSuffix, after: QString()); |
1305 | } |
1306 | |
1307 | reg.replace(QStringLiteral("{1}" ), after: QString()); |
1308 | reg.replace(QStringLiteral("{1,1}" ), after: QString()); |
1309 | |
1310 | // is DetectSpaces |
1311 | // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, + |
1312 | static const QRegularExpression isDetectSpaces( |
1313 | QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)" )); |
1314 | if (rule.string.contains(re: isDetectSpaces)) { |
1315 | char const * = rule.string.contains(c: u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "" ; |
1316 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":" |
1317 | << rule.string; |
1318 | return false; |
1319 | } |
1320 | |
1321 | #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))" |
1322 | #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])" |
1323 | |
1324 | // is RangeDetect |
1325 | static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:" |
1326 | "\\.\\*[?+]?" REG_CHAR "|" |
1327 | "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1" |
1328 | ")$" )); |
1329 | if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(s: u".*?"_sv ) || rule.string.contains(s: u"[^"_sv )) |
1330 | && reg.contains(re: isRange)) { |
1331 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string; |
1332 | return false; |
1333 | } |
1334 | |
1335 | // is AnyChar |
1336 | static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)" )); |
1337 | if (rule.string.contains(re: isAnyChar)) { |
1338 | auto = (reg[0] == u'^' || reg[1] == u'^') ? "with column=\"0\"" : "" ; |
1339 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra; |
1340 | return false; |
1341 | } |
1342 | |
1343 | // is LineContinue |
1344 | static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$" )); |
1345 | if (reg.contains(re: isLineContinue)) { |
1346 | auto = (reg[0] == u'^') ? "with column=\"0\"" : "" ; |
1347 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra; |
1348 | return false; |
1349 | } |
1350 | |
1351 | #define REG_DIGIT uR"((\[(0-9|\\d)\]|\\d))" |
1352 | #define REG_DIGITS REG_DIGIT u"([+]|" REG_DIGIT u"[*])" |
1353 | #define REG_DOT uR"((\\[.]|\[.\]))" |
1354 | // is Int, check \b[0-9]+ |
1355 | static const QRegularExpression isInt(uR"(^(\((\?:)?)*\\b(\((\?:)?)*)" REG_DIGITS uR"(\)*$)"_s ); |
1356 | if (reg.contains(re: isInt)) { |
1357 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Int:" << rule.string; |
1358 | return false; |
1359 | } |
1360 | |
1361 | // is Float, check (\b[0-9]+\.[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)? |
1362 | static const QRegularExpression isFloat( |
1363 | uR"(^(\\b|\((\?:)?)*)" REG_DIGITS REG_DOT |
1364 | REG_DIGIT u"[*][|]" REG_DOT REG_DIGITS uR"(\)+\((\?:)?\[[eE]+\]\[(\\?-\\?\+|\\?\+\\?-)\]\?)" REG_DIGITS uR"(\)\?\)*$)"_s ); |
1365 | if (reg.contains(re: isFloat)) { |
1366 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by Float:" << rule.string; |
1367 | return false; |
1368 | } |
1369 | #undef REG_DOT |
1370 | #undef REG_DIGIT |
1371 | #undef REG_DIGITS |
1372 | |
1373 | // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _ |
1374 | static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR)); |
1375 | reg.replace(re: sanitize1, QStringLiteral("_" )); |
1376 | |
1377 | #undef REG_CHAR |
1378 | #undef REG_ESCAPE_CHAR |
1379 | |
1380 | // use minimal or lazy operator |
1381 | static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]" )); |
1382 | static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]" )); |
1383 | |
1384 | if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: isMinimal) && !reg.contains(re: hasNotGreedy) |
1385 | && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0) |
1386 | && (reg.back() != u'$' || reg.contains(c: u'|'))) { |
1387 | qWarning() << rule.filename << "line" << rule.line |
1388 | << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string; |
1389 | return false; |
1390 | } |
1391 | |
1392 | // replace [:...:] with ___ |
1393 | static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])" )); |
1394 | reg.replace(re: sanitize2, QStringLiteral("___" )); |
1395 | |
1396 | // replace [ccc...], [special] with ... |
1397 | static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))" )); |
1398 | reg.replace(re: sanitize3, QStringLiteral("...\\1" )); |
1399 | |
1400 | // replace [c] with _ |
1401 | static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])" )); |
1402 | reg.replace(re: sanitize4, QStringLiteral("_" )); |
1403 | |
1404 | const int len = reg.size(); |
1405 | // replace [cC] with _ |
1406 | static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])" )); |
1407 | reg = reg.toUpper(); |
1408 | reg.replace(re: toInsensitive, after: QString()); |
1409 | |
1410 | // is StringDetect |
1411 | // ignore (?:, ) and {n} |
1412 | static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)" )); |
1413 | if (reg.contains(re: isStringDetect)) { |
1414 | char const * = rule.string.contains(c: u'^') ? "+ column=\"0\" or firstNonSpace=\"1\"" : "" ; |
1415 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg |
1416 | << ":" << rule.string; |
1417 | if (len != reg.size()) { |
1418 | qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string; |
1419 | } |
1420 | return false; |
1421 | } |
1422 | |
1423 | // column="0" |
1424 | if (rule.column == -1) { |
1425 | // ^ without | |
1426 | // (^sas*) -> ok |
1427 | // (^sa|s*) -> ko |
1428 | // (^(sa|s*)) -> ok |
1429 | auto first = std::as_const(t&: reg).begin(); |
1430 | auto last = std::as_const(t&: reg).end(); |
1431 | int depth = 0; |
1432 | |
1433 | while (u'(' == *first) { |
1434 | ++depth; |
1435 | ++first; |
1436 | if (u'?' == *first || u':' == first[1]) { |
1437 | first += 2; |
1438 | } |
1439 | } |
1440 | |
1441 | if (u'^' == *first) { |
1442 | const int bolDepth = depth; |
1443 | bool replace = true; |
1444 | |
1445 | while (++first != last) { |
1446 | if (u'(' == *first) { |
1447 | ++depth; |
1448 | } else if (u')' == *first) { |
1449 | --depth; |
1450 | if (depth < bolDepth) { |
1451 | // (^a)? === (^a|) -> ko |
1452 | if (first + 1 != last && u"*?"_sv .contains(c: first[1])) { |
1453 | replace = false; |
1454 | break; |
1455 | } |
1456 | } |
1457 | } else if (u'|' == *first) { |
1458 | // ignore '|' within subgroup |
1459 | if (depth <= bolDepth) { |
1460 | replace = false; |
1461 | break; |
1462 | } |
1463 | } |
1464 | } |
1465 | |
1466 | if (replace) { |
1467 | qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string; |
1468 | return false; |
1469 | } |
1470 | } |
1471 | } |
1472 | |
1473 | // add ^ with column=0 |
1474 | if (rule.column == 0 && !rule.isDotRegex) { |
1475 | bool hasStartOfLine = false; |
1476 | auto first = std::as_const(t&: reg).begin(); |
1477 | auto last = std::as_const(t&: reg).end(); |
1478 | for (; first != last; ++first) { |
1479 | if (*first == u'^') { |
1480 | hasStartOfLine = true; |
1481 | break; |
1482 | } else if (*first == u'(') { |
1483 | if (last - first >= 3 && first[1] == u'?' && first[2] == u':') { |
1484 | first += 2; |
1485 | } |
1486 | } else { |
1487 | break; |
1488 | } |
1489 | } |
1490 | |
1491 | if (!hasStartOfLine) { |
1492 | qWarning() << rule.filename << "line" << rule.line |
1493 | << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string; |
1494 | return false; |
1495 | } |
1496 | } |
1497 | |
1498 | bool useCapture = false; |
1499 | |
1500 | // detection of unnecessary capture |
1501 | if (regexp.captureCount()) { |
1502 | auto maximalCapture = [](const QStringView(&referenceNames)[9], const QString &s) { |
1503 | int maxCapture = 9; |
1504 | while (maxCapture && !s.contains(s: referenceNames[maxCapture - 1])) { |
1505 | --maxCapture; |
1506 | } |
1507 | return maxCapture; |
1508 | }; |
1509 | |
1510 | int maxCaptureUsed = 0; |
1511 | // maximal dynamic reference |
1512 | if (rule.context.context && !rule.context.stay) { |
1513 | for (const auto &nextRule : std::as_const(t&: rule.context.context->rules)) { |
1514 | if (nextRule.dynamic == XmlBool::True) { |
1515 | static const QStringView cap[]{ |
1516 | u"%1"_sv , |
1517 | u"%2"_sv , |
1518 | u"%3"_sv , |
1519 | u"%4"_sv , |
1520 | u"%5"_sv , |
1521 | u"%6"_sv , |
1522 | u"%7"_sv , |
1523 | u"%8"_sv , |
1524 | u"%9"_sv , |
1525 | }; |
1526 | int maxDynamicCapture = maximalCapture(cap, nextRule.string); |
1527 | maxCaptureUsed = std::max(a: maxCaptureUsed, b: maxDynamicCapture); |
1528 | } |
1529 | } |
1530 | } |
1531 | |
1532 | static const QStringView num1[]{ |
1533 | u"\\1"_sv , |
1534 | u"\\2"_sv , |
1535 | u"\\3"_sv , |
1536 | u"\\4"_sv , |
1537 | u"\\5"_sv , |
1538 | u"\\6"_sv , |
1539 | u"\\7"_sv , |
1540 | u"\\8"_sv , |
1541 | u"\\9"_sv , |
1542 | }; |
1543 | static const QStringView num2[]{ |
1544 | u"\\g1"_sv , |
1545 | u"\\g2"_sv , |
1546 | u"\\g3"_sv , |
1547 | u"\\g4"_sv , |
1548 | u"\\g5"_sv , |
1549 | u"\\g6"_sv , |
1550 | u"\\g7"_sv , |
1551 | u"\\g8"_sv , |
1552 | u"\\g9"_sv , |
1553 | }; |
1554 | const int maxBackReference = std::max(a: maximalCapture(num1, rule.string), b: maximalCapture(num2, rule.string)); |
1555 | |
1556 | const int maxCapture = std::max(a: maxCaptureUsed, b: maxBackReference); |
1557 | |
1558 | if (maxCapture && regexp.captureCount() > maxCapture) { |
1559 | qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture |
1560 | << "are used. Please, replace '(...)' with '(?:...)':" << rule.string; |
1561 | return false; |
1562 | } |
1563 | |
1564 | useCapture = maxCapture; |
1565 | } |
1566 | |
1567 | if (!useCapture) { |
1568 | // is DetectIdentifier |
1569 | static const QRegularExpression isDetectIdentifier( |
1570 | QStringLiteral(R"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)" )); |
1571 | if (rule.string.contains(re: isDetectIdentifier)) { |
1572 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string; |
1573 | return false; |
1574 | } |
1575 | } |
1576 | |
1577 | if (rule.isDotRegex) { |
1578 | // search next rule with same column or firstNonSpace |
1579 | int i = &rule - context.rules.data() + 1; |
1580 | const bool hasColumn = (rule.column != -1); |
1581 | const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True); |
1582 | const bool isSpecial = (hasColumn || hasFirstNonSpace); |
1583 | for (; i < context.rules.size(); ++i) { |
1584 | auto &rule2 = context.rules[i]; |
1585 | if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) { |
1586 | i = context.rules.size(); |
1587 | break; |
1588 | } |
1589 | |
1590 | const bool hasColumn2 = (rule2.column != -1); |
1591 | const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True); |
1592 | if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column) |
1593 | || (hasFirstNonSpace && hasFirstNonSpace2)) { |
1594 | break; |
1595 | } |
1596 | } |
1597 | |
1598 | auto ruleFilename = (filename == rule.filename) ? QString() : u"in "_sv + rule.filename; |
1599 | if (i == context.rules.size()) { |
1600 | if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty() |
1601 | && rule.endRegion.isEmpty() && !useCapture) { |
1602 | qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename |
1603 | << "should be replaced by fallthroughContext:" << rule.string; |
1604 | } |
1605 | } else { |
1606 | auto &nextRule = context.rules[i]; |
1607 | auto nextRuleFilename = (filename == nextRule.filename) ? QString() : u"in "_sv + nextRule.filename; |
1608 | qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename |
1609 | << "because a dot RegExpr is used line" << rule.line << ruleFilename; |
1610 | } |
1611 | |
1612 | // unnecessary quantifier |
1613 | static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)" )); |
1614 | static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)" )); |
1615 | auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2; |
1616 | if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: unnecessaryQuantifier)) { |
1617 | qWarning() << rule.filename << "line" << rule.line |
1618 | << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string; |
1619 | return false; |
1620 | } |
1621 | } |
1622 | } |
1623 | |
1624 | return true; |
1625 | } |
1626 | |
1627 | // Parse and check <emptyLine> |
1628 | bool parseEmptyLine(const QString &filename, const QXmlStreamReader &xml) |
1629 | { |
1630 | bool success = true; |
1631 | |
1632 | QString pattern; |
1633 | XmlBool casesensitive{}; |
1634 | |
1635 | const auto attrs = xml.attributes(); |
1636 | for (auto &attr : attrs) { |
1637 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
1638 | |
1639 | const bool = parser.extractString(str&: pattern, attrName: u"regexpr"_sv ) || parser.extractXmlBool(xmlBool&: casesensitive, attrName: u"casesensitive"_sv ); |
1640 | |
1641 | success = parser.checkIfExtracted(isExtracted); |
1642 | } |
1643 | |
1644 | if (pattern.isEmpty()) { |
1645 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr" ; |
1646 | success = false; |
1647 | } else { |
1648 | success = checkRegularExpression(filename, regexp: QRegularExpression(pattern), line: xml.lineNumber()); |
1649 | } |
1650 | |
1651 | return success; |
1652 | } |
1653 | |
1654 | //! Check that a regular expression: |
1655 | //! - isValid() |
1656 | //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. |
1657 | bool checkRegularExpression(const QString &filename, const QRegularExpression ®exp, int line) const |
1658 | { |
1659 | const auto pattern = regexp.pattern(); |
1660 | |
1661 | // validate regexp |
1662 | if (!regexp.isValid()) { |
1663 | qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset" |
1664 | << regexp.patternErrorOffset(); |
1665 | return false; |
1666 | } |
1667 | |
1668 | // catch possible case typos: [A-z] or [a-Z] |
1669 | const int azOffset = std::max(a: pattern.indexOf(s: u"A-z"_sv ), b: pattern.indexOf(s: u"a-Z"_sv )); |
1670 | if (azOffset >= 0) { |
1671 | qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset; |
1672 | return false; |
1673 | } |
1674 | |
1675 | return true; |
1676 | } |
1677 | |
1678 | //! Check fallthrough and fallthroughContext. |
1679 | //! Check kateversion for stopEmptyLineContextSwitchLoop. |
1680 | bool checkContextAttribute(const Definition &definition, const Context &context) const |
1681 | { |
1682 | bool success = true; |
1683 | |
1684 | if (!context.fallthroughContext.name.isEmpty()) { |
1685 | const bool mandatoryFallthroughAttribute = definition.kateVersion < KateVersion{5, 62}; |
1686 | if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) { |
1687 | qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context" |
1688 | << context.name; |
1689 | success = false; |
1690 | } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) { |
1691 | qWarning() << definition.filename << "line" << context.line |
1692 | << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context" |
1693 | << context.name; |
1694 | success = false; |
1695 | } |
1696 | } |
1697 | |
1698 | if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < KateVersion{5, 103}) { |
1699 | qWarning() << definition.filename << "line" << context.line |
1700 | << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name; |
1701 | success = false; |
1702 | } |
1703 | |
1704 | return success; |
1705 | } |
1706 | |
1707 | //! Search for additionalDeliminator/weakDeliminator which has no effect. |
1708 | bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const |
1709 | { |
1710 | if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) { |
1711 | return true; |
1712 | } |
1713 | |
1714 | bool success = true; |
1715 | |
1716 | if (definition.kateVersion < KateVersion{5, 79}) { |
1717 | qWarning() << definition.filename << "line" << rule.line |
1718 | << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion." ; |
1719 | success = false; |
1720 | } |
1721 | |
1722 | for (QChar c : rule.additionalDeliminator) { |
1723 | if (!definition.wordDelimiters.contains(c)) { |
1724 | return success; |
1725 | } |
1726 | } |
1727 | |
1728 | for (QChar c : rule.weakDeliminator) { |
1729 | if (definition.wordDelimiters.contains(c)) { |
1730 | return success; |
1731 | } |
1732 | } |
1733 | |
1734 | qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string; |
1735 | return false; |
1736 | } |
1737 | |
1738 | //! Check that keyword rule reference an existing keyword list. |
1739 | bool checkKeyword(const Definition &definition, const Context::Rule &rule) const |
1740 | { |
1741 | if (rule.type == Context::Rule::Type::keyword) { |
1742 | auto it = definition.keywordsList.find(key: rule.string); |
1743 | if (it == definition.keywordsList.end()) { |
1744 | qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string; |
1745 | return false; |
1746 | } |
1747 | } |
1748 | return true; |
1749 | } |
1750 | |
1751 | //! Search for rules with lookAhead="true" and context="#stay". |
1752 | //! This would cause an infinite loop. |
1753 | bool (const Context::Rule &rule) const |
1754 | { |
1755 | if (rule.lookAhead == XmlBool::True && rule.context.stay) { |
1756 | qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay" ; |
1757 | } |
1758 | return true; |
1759 | } |
1760 | |
1761 | //! Check that StringDetect contains a placeHolder when dynamic="1" |
1762 | bool checkStringDetect(const Context::Rule &rule) const |
1763 | { |
1764 | if (rule.type == Context::Rule::Type::StringDetect) { |
1765 | // dynamic == true and no place holder? |
1766 | if (rule.dynamic == XmlBool::True) { |
1767 | static const QRegularExpression placeHolder(QStringLiteral("%\\d+" )); |
1768 | if (!rule.string.contains(re: placeHolder)) { |
1769 | qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder" ; |
1770 | return false; |
1771 | } |
1772 | } |
1773 | } |
1774 | return true; |
1775 | } |
1776 | |
1777 | //! Check that WordDetect does not contain spaces at the beginning and end of text. |
1778 | bool checkWordDetect(const Context::Rule &rule) const |
1779 | { |
1780 | if (rule.type == Context::Rule::Type::WordDetect) { |
1781 | if (!rule.string.isEmpty() && (rule.string.front().isSpace() || rule.string.back().isSpace())) { |
1782 | qWarning() << rule.filename << "line" << rule.line << "contains a space at the beginning or end of the string:" << rule.string; |
1783 | return false; |
1784 | } |
1785 | } |
1786 | return true; |
1787 | } |
1788 | |
1789 | //! Check \<include> and delimiter in a keyword list |
1790 | bool checkKeywordsList(const Definition &definition) const |
1791 | { |
1792 | bool success = true; |
1793 | |
1794 | bool includeNotSupport = (definition.kateVersion < KateVersion{5, 53}); |
1795 | QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList); |
1796 | while (keywordsIt.hasNext()) { |
1797 | keywordsIt.next(); |
1798 | |
1799 | for (const auto &include : keywordsIt.value().items.includes) { |
1800 | if (includeNotSupport) { |
1801 | qWarning() << definition.filename << "line" << include.line |
1802 | << "<include> is only available since version \"5.53\". Please, increase kateversion." ; |
1803 | success = false; |
1804 | } |
1805 | success = checkKeywordInclude(definition, include) && success; |
1806 | } |
1807 | |
1808 | // Check that keyword list items do not have deliminator character |
1809 | #if 0 |
1810 | for (const auto& keyword : keywordsIt.value().items.keywords) { |
1811 | for (QChar c : keyword.content) { |
1812 | if (definition.wordDelimiters.contains(c)) { |
1813 | qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content; |
1814 | success = false; |
1815 | } |
1816 | } |
1817 | } |
1818 | #endif |
1819 | } |
1820 | |
1821 | return success; |
1822 | } |
1823 | |
1824 | //! Search for non-existing keyword include. |
1825 | bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const |
1826 | { |
1827 | bool containsKeywordName = true; |
1828 | int const idx = include.content.indexOf(s: u"##"_sv ); |
1829 | if (idx == -1) { |
1830 | auto it = definition.keywordsList.find(key: include.content); |
1831 | containsKeywordName = (it != definition.keywordsList.end()); |
1832 | } else { |
1833 | auto defName = include.content.sliced(pos: idx + 2); |
1834 | auto listName = include.content.sliced(pos: 0, n: idx); |
1835 | auto it = m_definitions.find(key: defName); |
1836 | if (it == m_definitions.end()) { |
1837 | qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content; |
1838 | return false; |
1839 | } |
1840 | containsKeywordName = it->keywordsList.contains(key: listName); |
1841 | } |
1842 | |
1843 | if (!containsKeywordName) { |
1844 | qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content; |
1845 | } |
1846 | |
1847 | return containsKeywordName; |
1848 | } |
1849 | |
1850 | //! Check if a rule is hidden by another |
1851 | //! - rule hidden by DetectChar or AnyChar |
1852 | //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar |
1853 | //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings |
1854 | //! - duplicate rule (Int, Float, keyword with same String, etc) |
1855 | //! - Rule hidden by a dot regex |
1856 | bool checkUreachableRules(const QString &filename, |
1857 | const Context &context, |
1858 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const |
1859 | { |
1860 | if (context.isOnlyIncluded) { |
1861 | return true; |
1862 | } |
1863 | |
1864 | struct Rule4 { |
1865 | RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr) |
1866 | { |
1867 | auto set = [&](RuleAndInclude &ruleAndInclude) { |
1868 | auto old = ruleAndInclude; |
1869 | ruleAndInclude = {.rule: &rule, .includeRules: includeRules}; |
1870 | return old; |
1871 | }; |
1872 | |
1873 | if (rule.firstNonSpace == XmlBool::True) { |
1874 | return set(firstNonSpace); |
1875 | } else if (rule.column == 0) { |
1876 | return set(column0); |
1877 | } else if (rule.column > 0) { |
1878 | return set(columnGreaterThan0[rule.column]); |
1879 | } else { |
1880 | return set(normal); |
1881 | } |
1882 | } |
1883 | |
1884 | private: |
1885 | RuleAndInclude normal; |
1886 | RuleAndInclude column0; |
1887 | QMap<int, RuleAndInclude> columnGreaterThan0; |
1888 | RuleAndInclude firstNonSpace; |
1889 | }; |
1890 | |
1891 | // Associate QChar with RuleAndInclude |
1892 | struct CharTable { |
1893 | /// Search RuleAndInclude associated with @p c. |
1894 | RuleAndInclude find(QChar c) const |
1895 | { |
1896 | if (c.unicode() < 128) { |
1897 | return m_asciiMap[c.unicode()]; |
1898 | } |
1899 | auto it = m_utf8Map.find(key: c); |
1900 | return it == m_utf8Map.end() ? RuleAndInclude{.rule: nullptr, .includeRules: nullptr} : it.value(); |
1901 | } |
1902 | |
1903 | /// Search RuleAndInclude associated with the characters of @p s. |
1904 | /// \return an empty QList when at least one character is not found. |
1905 | QList<RuleAndInclude> find(QStringView s) const |
1906 | { |
1907 | QList<RuleAndInclude> result; |
1908 | |
1909 | for (QChar c : s) { |
1910 | if (!find(c)) { |
1911 | return result; |
1912 | } |
1913 | } |
1914 | |
1915 | for (QChar c : s) { |
1916 | result.append(t: find(c)); |
1917 | } |
1918 | |
1919 | return result; |
1920 | } |
1921 | |
1922 | /// Associates @p c with a rule. |
1923 | void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1924 | { |
1925 | if (c.unicode() < 128) { |
1926 | m_asciiMap[c.unicode()] = {.rule: &rule, .includeRules: includeRule}; |
1927 | } else { |
1928 | m_utf8Map[c] = {.rule: &rule, .includeRules: includeRule}; |
1929 | } |
1930 | } |
1931 | |
1932 | /// Associates each character of @p s with a rule. |
1933 | void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1934 | { |
1935 | for (QChar c : s) { |
1936 | append(c, rule, includeRule); |
1937 | } |
1938 | } |
1939 | |
1940 | private: |
1941 | RuleAndInclude m_asciiMap[127]{}; |
1942 | QMap<QChar, RuleAndInclude> m_utf8Map; |
1943 | }; |
1944 | |
1945 | struct Char4Tables { |
1946 | CharTable chars; |
1947 | CharTable charsColumn0; |
1948 | QMap<int, CharTable> charsColumnGreaterThan0; |
1949 | CharTable charsFirstNonSpace; |
1950 | }; |
1951 | |
1952 | // View on Char4Tables members |
1953 | struct CharTableArray { |
1954 | // Append Char4Tables members that satisfies firstNonSpace and column. |
1955 | // Char4Tables::char is always added. |
1956 | CharTableArray(Char4Tables &tables, const Context::Rule &rule) |
1957 | { |
1958 | if (rule.firstNonSpace == XmlBool::True) { |
1959 | appendTable(t&: tables.charsFirstNonSpace); |
1960 | } |
1961 | |
1962 | if (rule.column == 0) { |
1963 | appendTable(t&: tables.charsColumn0); |
1964 | } else if (rule.column > 0) { |
1965 | appendTable(t&: tables.charsColumnGreaterThan0[rule.column]); |
1966 | } |
1967 | |
1968 | appendTable(t&: tables.chars); |
1969 | } |
1970 | |
1971 | // Removes Char4Tables::chars when the rule contains firstNonSpace or column |
1972 | void removeNonSpecialWhenSpecial() |
1973 | { |
1974 | if (m_size > 1) { |
1975 | --m_size; |
1976 | } |
1977 | } |
1978 | |
1979 | /// Search RuleAndInclude associated with @p c. |
1980 | RuleAndInclude find(QChar c) const |
1981 | { |
1982 | for (int i = 0; i < m_size; ++i) { |
1983 | if (auto ruleAndInclude = m_charTables[i]->find(c)) { |
1984 | return ruleAndInclude; |
1985 | } |
1986 | } |
1987 | return RuleAndInclude{.rule: nullptr, .includeRules: nullptr}; |
1988 | } |
1989 | |
1990 | /// Search RuleAndInclude associated with the characters of @p s. |
1991 | /// \return an empty QList when at least one character is not found. |
1992 | QList<RuleAndInclude> find(QStringView s) const |
1993 | { |
1994 | for (int i = 0; i < m_size; ++i) { |
1995 | auto result = m_charTables[i]->find(s); |
1996 | if (result.size()) { |
1997 | while (++i < m_size) { |
1998 | result.append(other: m_charTables[i]->find(s)); |
1999 | } |
2000 | return result; |
2001 | } |
2002 | } |
2003 | return QList<RuleAndInclude>(); |
2004 | } |
2005 | |
2006 | /// Associates @p c with a rule. |
2007 | void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
2008 | { |
2009 | for (int i = 0; i < m_size; ++i) { |
2010 | m_charTables[i]->append(c, rule, includeRule); |
2011 | } |
2012 | } |
2013 | |
2014 | /// Associates each character of @p s with a rule. |
2015 | void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
2016 | { |
2017 | for (int i = 0; i < m_size; ++i) { |
2018 | m_charTables[i]->append(s, rule, includeRule); |
2019 | } |
2020 | } |
2021 | |
2022 | private: |
2023 | void appendTable(CharTable &t) |
2024 | { |
2025 | m_charTables[m_size] = &t; |
2026 | ++m_size; |
2027 | } |
2028 | |
2029 | CharTable *m_charTables[3]; |
2030 | int m_size = 0; |
2031 | }; |
2032 | |
2033 | struct ObservableRule { |
2034 | const Context::Rule *rule; |
2035 | const Context::Rule *includeRules; |
2036 | |
2037 | bool hasResolvedIncludeRules() const |
2038 | { |
2039 | return rule == includeRules; |
2040 | } |
2041 | }; |
2042 | |
2043 | // Iterates over all the rules, including those in includedRules |
2044 | struct RuleIterator { |
2045 | RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule) |
2046 | : m_end(&endRule - rules.data()) |
2047 | , m_rules(rules) |
2048 | { |
2049 | } |
2050 | |
2051 | /// \return next rule or nullptr |
2052 | const Context::Rule *next() |
2053 | { |
2054 | // if in includedRules |
2055 | if (m_includedRules) { |
2056 | ++m_i2; |
2057 | if (m_i2 != m_includedRules->size()) { |
2058 | return (*m_includedRules)[m_i2]; |
2059 | } |
2060 | ++m_i; |
2061 | m_includedRules = nullptr; |
2062 | } |
2063 | |
2064 | // if is a includedRules |
2065 | while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) { |
2066 | if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) { |
2067 | m_i2 = 0; |
2068 | m_includedRules = &m_rules[m_i].rule->includedRules; |
2069 | return (*m_includedRules)[m_i2]; |
2070 | } |
2071 | ++m_i; |
2072 | } |
2073 | |
2074 | if (m_i < m_end) { |
2075 | ++m_i; |
2076 | return m_rules[m_i - 1].rule; |
2077 | } |
2078 | |
2079 | return nullptr; |
2080 | } |
2081 | |
2082 | /// \return current IncludeRules or nullptr |
2083 | const Context::Rule *currentIncludeRules() const |
2084 | { |
2085 | return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules; |
2086 | } |
2087 | |
2088 | private: |
2089 | int m_i = 0; |
2090 | int m_i2 = 0; |
2091 | const int m_end; |
2092 | const QList<ObservableRule> &m_rules; |
2093 | const QList<const Context::Rule *> *m_includedRules = nullptr; |
2094 | }; |
2095 | |
2096 | // Dot regex container that satisfies firstNonSpace and column. |
2097 | struct DotRegex { |
2098 | /// Append a dot regex rule. |
2099 | void append(const Context::Rule &rule, const Context::Rule *includedRule) |
2100 | { |
2101 | auto array = extractDotRegexes(rule); |
2102 | if (array[0]) { |
2103 | *array[0] = {.rule: &rule, .includeRules: includedRule}; |
2104 | } |
2105 | if (array[1]) { |
2106 | *array[1] = {.rule: &rule, .includeRules: includedRule}; |
2107 | } |
2108 | } |
2109 | |
2110 | /// Search dot regex which hides @p rule |
2111 | RuleAndInclude find(const Context::Rule &rule) |
2112 | { |
2113 | auto array = extractDotRegexes(rule); |
2114 | if (array[0]) { |
2115 | return *array[0]; |
2116 | } |
2117 | if (array[1]) { |
2118 | return *array[1]; |
2119 | } |
2120 | return RuleAndInclude{}; |
2121 | } |
2122 | |
2123 | private: |
2124 | using Array = std::array<RuleAndInclude *, 2>; |
2125 | |
2126 | Array (const Context::Rule &rule) |
2127 | { |
2128 | Array ret{}; |
2129 | |
2130 | if (rule.firstNonSpace != XmlBool::True && rule.column == -1) { |
2131 | ret[0] = &dotRegex; |
2132 | } else { |
2133 | if (rule.firstNonSpace == XmlBool::True) { |
2134 | ret[0] = &dotRegexFirstNonSpace; |
2135 | } |
2136 | |
2137 | if (rule.column == 0) { |
2138 | ret[1] = &dotRegexColumn0; |
2139 | } else if (rule.column > 0) { |
2140 | ret[1] = &dotRegexColumnGreaterThan0[rule.column]; |
2141 | } |
2142 | } |
2143 | |
2144 | return ret; |
2145 | } |
2146 | |
2147 | RuleAndInclude dotRegex{}; |
2148 | RuleAndInclude dotRegexColumn0{}; |
2149 | QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{}; |
2150 | RuleAndInclude dotRegexFirstNonSpace{}; |
2151 | }; |
2152 | |
2153 | bool success = true; |
2154 | |
2155 | // characters of DetectChar/AnyChar |
2156 | Char4Tables detectChars; |
2157 | // characters of dynamic DetectChar |
2158 | Char4Tables dynamicDetectChars; |
2159 | // characters of LineContinue |
2160 | Char4Tables lineContinueChars; |
2161 | |
2162 | Rule4 intRule{}; |
2163 | Rule4 floatRule{}; |
2164 | Rule4 hlCCharRule{}; |
2165 | Rule4 hlCOctRule{}; |
2166 | Rule4 hlCHexRule{}; |
2167 | Rule4 hlCStringCharRule{}; |
2168 | Rule4 detectIdentifierRule{}; |
2169 | |
2170 | // Contains includedRules and included includedRules |
2171 | QMap<Context const *, RuleAndInclude> includeContexts; |
2172 | |
2173 | DotRegex dotRegex; |
2174 | |
2175 | QList<ObservableRule> observedRules; |
2176 | observedRules.reserve(asize: context.rules.size()); |
2177 | for (const Context::Rule &rule : context.rules) { |
2178 | const Context::Rule *includeRule = nullptr; |
2179 | if (rule.type == Context::Rule::Type::IncludeRules) { |
2180 | auto *context = rule.context.context; |
2181 | if (context && context->isOnlyIncluded) { |
2182 | includeRule = &rule; |
2183 | } |
2184 | } |
2185 | |
2186 | observedRules.push_back(t: {.rule: &rule, .includeRules: includeRule}); |
2187 | if (includeRule) { |
2188 | for (const Context::Rule *rule2 : rule.includedRules) { |
2189 | observedRules.push_back(t: {.rule: rule2, .includeRules: includeRule}); |
2190 | } |
2191 | } |
2192 | } |
2193 | |
2194 | for (auto &observedRule : observedRules) { |
2195 | const Context::Rule &rule = *observedRule.rule; |
2196 | bool isUnreachable = false; |
2197 | QList<RuleAndInclude> unreachableBy; |
2198 | |
2199 | // declare rule as unreachable if ruleAndInclude is not empty |
2200 | auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) { |
2201 | if (ruleAndInclude) { |
2202 | isUnreachable = true; |
2203 | unreachableBy.append(t: ruleAndInclude); |
2204 | } |
2205 | }; |
2206 | |
2207 | // declare rule as unreachable if ruleAndIncludes is not empty |
2208 | auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) { |
2209 | if (!ruleAndIncludes.isEmpty()) { |
2210 | isUnreachable = true; |
2211 | unreachableBy.append(l: ruleAndIncludes); |
2212 | } |
2213 | }; |
2214 | |
2215 | // check if rule2.firstNonSpace/column is compatible with those of rule |
2216 | auto isCompatible = [&rule](Context::Rule const &rule2) { |
2217 | return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1) |
2218 | || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True); |
2219 | }; |
2220 | |
2221 | updateUnreachable1(dotRegex.find(rule)); |
2222 | |
2223 | switch (rule.type) { |
2224 | // checks if hidden by DetectChar/AnyChar |
2225 | // then add the characters to detectChars |
2226 | case Context::Rule::Type::AnyChar: { |
2227 | auto tables = CharTableArray(detectChars, rule); |
2228 | updateUnreachable2(tables.find(s: rule.string)); |
2229 | tables.removeNonSpecialWhenSpecial(); |
2230 | tables.append(s: rule.string, rule); |
2231 | break; |
2232 | } |
2233 | |
2234 | // check if is hidden by DetectChar/AnyChar |
2235 | // then add the characters to detectChars or dynamicDetectChars |
2236 | case Context::Rule::Type::DetectChar: { |
2237 | auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; |
2238 | auto tables = CharTableArray(chars4, rule); |
2239 | updateUnreachable1(tables.find(c: rule.char0)); |
2240 | tables.removeNonSpecialWhenSpecial(); |
2241 | tables.append(c: rule.char0, rule); |
2242 | break; |
2243 | } |
2244 | |
2245 | // check if hidden by DetectChar/AnyChar |
2246 | // then add spaces characters to detectChars |
2247 | case Context::Rule::Type::DetectSpaces: { |
2248 | auto tables = CharTableArray(detectChars, rule); |
2249 | updateUnreachable2(tables.find(s: u" \t"_sv )); |
2250 | tables.removeNonSpecialWhenSpecial(); |
2251 | tables.append(c: u' ', rule); |
2252 | tables.append(c: u'\t', rule); |
2253 | break; |
2254 | } |
2255 | |
2256 | // check if hidden by DetectChar/AnyChar |
2257 | case Context::Rule::Type::HlCChar: |
2258 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'\'')); |
2259 | updateUnreachable1(hlCCharRule.setRule(rule)); |
2260 | break; |
2261 | |
2262 | // check if hidden by DetectChar/AnyChar |
2263 | case Context::Rule::Type::HlCHex: |
2264 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'0')); |
2265 | updateUnreachable1(hlCHexRule.setRule(rule)); |
2266 | break; |
2267 | |
2268 | // check if hidden by DetectChar/AnyChar |
2269 | case Context::Rule::Type::HlCOct: |
2270 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'0')); |
2271 | updateUnreachable1(hlCOctRule.setRule(rule)); |
2272 | break; |
2273 | |
2274 | // check if hidden by DetectChar/AnyChar |
2275 | case Context::Rule::Type::HlCStringChar: |
2276 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: u'\\')); |
2277 | updateUnreachable1(hlCStringCharRule.setRule(rule)); |
2278 | break; |
2279 | |
2280 | // check if hidden by DetectChar/AnyChar |
2281 | case Context::Rule::Type::Int: |
2282 | updateUnreachable2(CharTableArray(detectChars, rule).find(s: u"0123456789"_sv )); |
2283 | updateUnreachable1(intRule.setRule(rule)); |
2284 | break; |
2285 | |
2286 | // check if hidden by DetectChar/AnyChar |
2287 | case Context::Rule::Type::Float: |
2288 | updateUnreachable2(CharTableArray(detectChars, rule).find(s: u"0123456789."_sv )); |
2289 | updateUnreachable1(floatRule.setRule(rule)); |
2290 | // check that Float is before Int |
2291 | updateUnreachable1(Rule4(intRule).setRule(rule)); |
2292 | break; |
2293 | |
2294 | // check if hidden by another DetectIdentifier rule |
2295 | case Context::Rule::Type::DetectIdentifier: |
2296 | updateUnreachable1(detectIdentifierRule.setRule(rule)); |
2297 | break; |
2298 | |
2299 | // check if hidden by DetectChar/AnyChar or another LineContinue |
2300 | case Context::Rule::Type::LineContinue: { |
2301 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0)); |
2302 | |
2303 | auto tables = CharTableArray(lineContinueChars, rule); |
2304 | updateUnreachable1(tables.find(c: rule.char0)); |
2305 | tables.removeNonSpecialWhenSpecial(); |
2306 | tables.append(c: rule.char0, rule); |
2307 | break; |
2308 | } |
2309 | |
2310 | // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect |
2311 | case Context::Rule::Type::Detect2Chars: |
2312 | case Context::Rule::Type::RangeDetect: |
2313 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0)); |
2314 | if (!isUnreachable) { |
2315 | RuleIterator ruleIterator(observedRules, observedRule); |
2316 | while (const auto *rulePtr = ruleIterator.next()) { |
2317 | if (isUnreachable) { |
2318 | break; |
2319 | } |
2320 | const auto &rule2 = *rulePtr; |
2321 | if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) { |
2322 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2323 | } |
2324 | } |
2325 | } |
2326 | break; |
2327 | |
2328 | case Context::Rule::Type::RegExpr: { |
2329 | if (rule.isDotRegex) { |
2330 | dotRegex.append(rule, includedRule: nullptr); |
2331 | break; |
2332 | } |
2333 | |
2334 | // check that `rule` does not have another RegExpr as a prefix |
2335 | RuleIterator ruleIterator(observedRules, observedRule); |
2336 | while (const auto *rulePtr = ruleIterator.next()) { |
2337 | if (isUnreachable) { |
2338 | break; |
2339 | } |
2340 | const auto &rule2 = *rulePtr; |
2341 | if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive |
2342 | && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(s: rule2.sanitizedString)) { |
2343 | bool add = (rule.sanitizedString.startsWith(s: rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2); |
2344 | if (!add) { |
2345 | // \s.* (sanitized = \s) is considered hiding \s*\S |
2346 | // we check the quantifiers to see if this is the case |
2347 | auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode(); |
2348 | auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode(); |
2349 | auto c3 = rule2.sanitizedString.back().unicode(); |
2350 | if (c3 == '*' || c3 == '?' || c3 == '+') { |
2351 | add = true; |
2352 | } else if (c1 == '*' || c1 == '?') { |
2353 | add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3)); |
2354 | } else { |
2355 | add = true; |
2356 | } |
2357 | } |
2358 | if (add) { |
2359 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2360 | } |
2361 | } |
2362 | } |
2363 | |
2364 | Q_FALLTHROUGH(); |
2365 | } |
2366 | // check if a rule does not have another rule as a prefix |
2367 | case Context::Rule::Type::WordDetect: |
2368 | case Context::Rule::Type::StringDetect: { |
2369 | // check that dynamic `rule` does not have another dynamic StringDetect as a prefix |
2370 | if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) { |
2371 | RuleIterator ruleIterator(observedRules, observedRule); |
2372 | while (const auto *rulePtr = ruleIterator.next()) { |
2373 | if (isUnreachable) { |
2374 | break; |
2375 | } |
2376 | |
2377 | const auto &rule2 = *rulePtr; |
2378 | if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) { |
2379 | continue; |
2380 | } |
2381 | |
2382 | const bool isSensitive = (rule2.insensitive == XmlBool::True); |
2383 | const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; |
2384 | if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(s: rule2.string, cs: caseSensitivity)) { |
2385 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2386 | } |
2387 | } |
2388 | } |
2389 | |
2390 | // string used for comparison and truncated from "dynamic" part |
2391 | QStringView s = rule.string; |
2392 | |
2393 | // truncate to '%' with dynamic rules |
2394 | if (rule.dynamic == XmlBool::True) { |
2395 | static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)" )); |
2396 | auto result = dynamicPosition.match(subject: rule.string); |
2397 | s = s.sliced(pos: 0, n: result.capturedLength()); |
2398 | // check if hidden by DetectChar/AnyChar |
2399 | if (s.size() + 2 <= rule.string.size()) { |
2400 | auto tables = CharTableArray(dynamicDetectChars, rule); |
2401 | updateUnreachable1(tables.find(c: s.data()[s.size() + 2])); |
2402 | } |
2403 | } |
2404 | |
2405 | QString sanitizedRegex; |
2406 | // truncate to special character with RegExpr. |
2407 | // If regexp contains '|', `s` becomes empty. |
2408 | if (rule.type == Context::Rule::Type::RegExpr) { |
2409 | static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)" )); |
2410 | static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])" )); |
2411 | const qsizetype result = regularChars.match(subject: rule.string).capturedLength(); |
2412 | const qsizetype pos = qMin(a: result, b: s.size()); |
2413 | if (rule.string.indexOf(ch: u'|', from: pos) < pos) { |
2414 | sanitizedRegex = rule.string.sliced(pos: 0, n: qMin(a: result, b: s.size())); |
2415 | sanitizedRegex.replace(re: sanitizeChars, QStringLiteral("\\1" )); |
2416 | s = sanitizedRegex; |
2417 | } else { |
2418 | s = QStringView(); |
2419 | } |
2420 | } |
2421 | |
2422 | // check if hidden by DetectChar/AnyChar |
2423 | if (s.size() > 0) { |
2424 | auto t = CharTableArray(detectChars, rule); |
2425 | if (rule.insensitive != XmlBool::True) { |
2426 | updateUnreachable1(t.find(c: s[0])); |
2427 | } else { |
2428 | QChar c2[]{s[0].toLower(), s[0].toUpper()}; |
2429 | updateUnreachable2(t.find(s: QStringView(c2, 2))); |
2430 | } |
2431 | |
2432 | // StringDetect is a DetectChar |
2433 | if (rule.type == Context::Rule::Type::StringDetect && rule.string.size() == 1) { |
2434 | auto tables = CharTableArray(detectChars, rule); |
2435 | auto c = rule.string[0]; |
2436 | if (rule.insensitive != XmlBool::True) { |
2437 | c = c.toLower(); |
2438 | tables.removeNonSpecialWhenSpecial(); |
2439 | tables.append(c, rule); |
2440 | c = c.toUpper(); |
2441 | } |
2442 | tables.removeNonSpecialWhenSpecial(); |
2443 | tables.append(c, rule); |
2444 | } |
2445 | } |
2446 | |
2447 | // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s |
2448 | if (s.size() > 0 && !isUnreachable) { |
2449 | // combination of uppercase and lowercase |
2450 | RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}}; |
2451 | |
2452 | RuleIterator ruleIterator(observedRules, observedRule); |
2453 | while (const auto *rulePtr = ruleIterator.next()) { |
2454 | if (isUnreachable) { |
2455 | break; |
2456 | } |
2457 | const auto &rule2 = *rulePtr; |
2458 | const bool isSensitive = (rule2.insensitive == XmlBool::True); |
2459 | const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; |
2460 | |
2461 | switch (rule2.type) { |
2462 | // check that it is not a detectChars prefix |
2463 | case Context::Rule::Type::Detect2Chars: |
2464 | if (isCompatible(rule2) && s.size() >= 2) { |
2465 | if (rule.insensitive != XmlBool::True) { |
2466 | if (rule2.char0 == s[0] && rule2.char1 == s[1]) { |
2467 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2468 | } |
2469 | } else { |
2470 | // when the string is case insensitive, |
2471 | // all 4 upper/lower case combinations must be found |
2472 | auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) { |
2473 | if (!x && rule2.char0 == c1 && rule2.char0 == c2) { |
2474 | x = {.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}; |
2475 | } |
2476 | }; |
2477 | set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower()); |
2478 | set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper()); |
2479 | set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper()); |
2480 | set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower()); |
2481 | |
2482 | if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2] |
2483 | && detect2CharsInsensitives[3]) { |
2484 | isUnreachable = true; |
2485 | unreachableBy.append(t: detect2CharsInsensitives[0]); |
2486 | unreachableBy.append(t: detect2CharsInsensitives[1]); |
2487 | unreachableBy.append(t: detect2CharsInsensitives[2]); |
2488 | unreachableBy.append(t: detect2CharsInsensitives[3]); |
2489 | } |
2490 | } |
2491 | } |
2492 | break; |
2493 | |
2494 | // check that it is not a StringDetect prefix |
2495 | case Context::Rule::Type::StringDetect: |
2496 | if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True) |
2497 | && s.startsWith(s: rule2.string, cs: caseSensitivity)) { |
2498 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2499 | } |
2500 | break; |
2501 | |
2502 | // check if a WordDetect is hidden by another WordDetect |
2503 | case Context::Rule::Type::WordDetect: |
2504 | if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True) |
2505 | && 0 == rule.string.compare(s: rule2.string, cs: caseSensitivity)) { |
2506 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2507 | } |
2508 | break; |
2509 | |
2510 | default:; |
2511 | } |
2512 | } |
2513 | } |
2514 | |
2515 | break; |
2516 | } |
2517 | |
2518 | // check if hidden by another keyword rule |
2519 | case Context::Rule::Type::keyword: { |
2520 | RuleIterator ruleIterator(observedRules, observedRule); |
2521 | while (const auto *rulePtr = ruleIterator.next()) { |
2522 | if (isUnreachable) { |
2523 | break; |
2524 | } |
2525 | const auto &rule2 = *rulePtr; |
2526 | if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) { |
2527 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2528 | } |
2529 | } |
2530 | // TODO check that all keywords are hidden by another rules |
2531 | break; |
2532 | } |
2533 | |
2534 | // add characters in those used but without checking if they are already. |
2535 | // <DetectChar char="}" /> |
2536 | // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked |
2537 | // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added |
2538 | // <DetectChar char="{" /> <- hidden by previous rule |
2539 | case Context::Rule::Type::IncludeRules: |
2540 | if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { |
2541 | break; |
2542 | } |
2543 | |
2544 | if (auto &ruleAndInclude = includeContexts[rule.context.context]) { |
2545 | updateUnreachable1(ruleAndInclude); |
2546 | } else { |
2547 | ruleAndInclude.rule = &rule; |
2548 | } |
2549 | |
2550 | for (const auto *rulePtr : rule.includedIncludeRules) { |
2551 | includeContexts.insert(key: rulePtr->context.context, value: RuleAndInclude{.rule: rulePtr, .includeRules: &rule}); |
2552 | } |
2553 | |
2554 | if (observedRule.includeRules) { |
2555 | break; |
2556 | } |
2557 | |
2558 | for (const auto *rulePtr : rule.includedRules) { |
2559 | const auto &rule2 = *rulePtr; |
2560 | switch (rule2.type) { |
2561 | case Context::Rule::Type::AnyChar: { |
2562 | auto tables = CharTableArray(detectChars, rule2); |
2563 | tables.removeNonSpecialWhenSpecial(); |
2564 | tables.append(s: rule2.string, rule: rule2, includeRule: &rule); |
2565 | break; |
2566 | } |
2567 | |
2568 | case Context::Rule::Type::DetectChar: { |
2569 | auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; |
2570 | auto tables = CharTableArray(chars4, rule2); |
2571 | tables.removeNonSpecialWhenSpecial(); |
2572 | tables.append(c: rule2.char0, rule: rule2, includeRule: &rule); |
2573 | break; |
2574 | } |
2575 | |
2576 | case Context::Rule::Type::DetectSpaces: { |
2577 | auto tables = CharTableArray(detectChars, rule2); |
2578 | tables.removeNonSpecialWhenSpecial(); |
2579 | tables.append(c: u' ', rule: rule2, includeRule: &rule); |
2580 | tables.append(c: u'\t', rule: rule2, includeRule: &rule); |
2581 | break; |
2582 | } |
2583 | |
2584 | case Context::Rule::Type::HlCChar: |
2585 | hlCCharRule.setRule(rule: rule2, includeRules: &rule); |
2586 | break; |
2587 | |
2588 | case Context::Rule::Type::HlCHex: |
2589 | hlCHexRule.setRule(rule: rule2, includeRules: &rule); |
2590 | break; |
2591 | |
2592 | case Context::Rule::Type::HlCOct: |
2593 | hlCOctRule.setRule(rule: rule2, includeRules: &rule); |
2594 | break; |
2595 | |
2596 | case Context::Rule::Type::HlCStringChar: |
2597 | hlCStringCharRule.setRule(rule: rule2, includeRules: &rule); |
2598 | break; |
2599 | |
2600 | case Context::Rule::Type::Int: |
2601 | intRule.setRule(rule: rule2, includeRules: &rule); |
2602 | break; |
2603 | |
2604 | case Context::Rule::Type::Float: |
2605 | floatRule.setRule(rule: rule2, includeRules: &rule); |
2606 | break; |
2607 | |
2608 | case Context::Rule::Type::LineContinue: { |
2609 | auto tables = CharTableArray(lineContinueChars, rule2); |
2610 | tables.removeNonSpecialWhenSpecial(); |
2611 | tables.append(c: rule2.char0, rule: rule2, includeRule: &rule); |
2612 | break; |
2613 | } |
2614 | |
2615 | case Context::Rule::Type::RegExpr: |
2616 | if (rule2.isDotRegex) { |
2617 | dotRegex.append(rule: rule2, includedRule: &rule); |
2618 | } |
2619 | break; |
2620 | |
2621 | case Context::Rule::Type::StringDetect: { |
2622 | // StringDetect is a DetectChar |
2623 | if (rule2.string.size() == 1 || (rule2.string.size() == 2 && rule2.dynamic == XmlBool::True)) { |
2624 | auto &chars4 = (rule2.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; |
2625 | auto tables = CharTableArray(chars4, rule2); |
2626 | tables.removeNonSpecialWhenSpecial(); |
2627 | tables.append(c: rule2.string.back(), rule: rule2, includeRule: &rule); |
2628 | } |
2629 | break; |
2630 | } |
2631 | |
2632 | case Context::Rule::Type::WordDetect: |
2633 | case Context::Rule::Type::Detect2Chars: |
2634 | case Context::Rule::Type::IncludeRules: |
2635 | case Context::Rule::Type::DetectIdentifier: |
2636 | case Context::Rule::Type::keyword: |
2637 | case Context::Rule::Type::Unknown: |
2638 | case Context::Rule::Type::RangeDetect: |
2639 | break; |
2640 | } |
2641 | } |
2642 | break; |
2643 | |
2644 | case Context::Rule::Type::Unknown: |
2645 | break; |
2646 | } |
2647 | |
2648 | if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { |
2649 | auto &unreachableIncludedRule = unreachableIncludedRules[&rule]; |
2650 | if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) { |
2651 | unreachableIncludedRule.unreachableBy.append(l: unreachableBy); |
2652 | } else { |
2653 | unreachableIncludedRule.alwaysUnreachable = false; |
2654 | } |
2655 | } else if (isUnreachable) { |
2656 | success = false; |
2657 | QString message; |
2658 | message.reserve(asize: 128); |
2659 | for (auto &ruleAndInclude : std::as_const(t&: unreachableBy)) { |
2660 | message += u"line "_sv ; |
2661 | if (ruleAndInclude.includeRules) { |
2662 | message += QString::number(ruleAndInclude.includeRules->line); |
2663 | message += u" [by '"_sv ; |
2664 | message += ruleAndInclude.includeRules->context.name; |
2665 | message += u"' line "_sv ; |
2666 | message += QString::number(ruleAndInclude.rule->line); |
2667 | if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) { |
2668 | message += u" ("_sv ; |
2669 | message += ruleAndInclude.rule->filename; |
2670 | message += u')'; |
2671 | } |
2672 | message += u']'; |
2673 | } else { |
2674 | message += QString::number(ruleAndInclude.rule->line); |
2675 | } |
2676 | message += u", "_sv ; |
2677 | } |
2678 | message.chop(n: 2); |
2679 | qWarning() << filename << "line" << rule.line << "unreachable rule by" << message; |
2680 | } |
2681 | } |
2682 | |
2683 | return success; |
2684 | } |
2685 | |
2686 | //! Proposes to merge certain rule sequences |
2687 | //! - several DetectChar/AnyChar into AnyChar |
2688 | //! - several RegExpr into one RegExpr |
2689 | bool suggestRuleMerger(const QString &filename, const Context &context) const |
2690 | { |
2691 | bool success = true; |
2692 | |
2693 | if (context.rules.isEmpty()) { |
2694 | return success; |
2695 | } |
2696 | |
2697 | auto it = context.rules.begin(); |
2698 | const auto end = context.rules.end() - 1; |
2699 | |
2700 | for (; it < end; ++it) { |
2701 | const auto &rule1 = *it; |
2702 | const auto &rule2 = it[1]; |
2703 | |
2704 | auto isCommonCompatible = [&] { |
2705 | if (rule1.lookAhead != rule2.lookAhead) { |
2706 | return false; |
2707 | } |
2708 | // ignore attribute when lookAhead is true |
2709 | if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) { |
2710 | return false; |
2711 | } |
2712 | // clang-format off |
2713 | return rule1.beginRegion == rule2.beginRegion |
2714 | && rule1.endRegion == rule2.endRegion |
2715 | && rule1.firstNonSpace == rule2.firstNonSpace |
2716 | && rule1.context.context == rule2.context.context |
2717 | && rule1.context.popCount == rule2.context.popCount; |
2718 | // clang-format on |
2719 | }; |
2720 | |
2721 | switch (rule1.type) { |
2722 | // request to merge StringDetect with AnyChar |
2723 | case Context::Rule::Type::StringDetect: |
2724 | if (rule1.string.size() != 1 || rule1.dynamic == XmlBool::True) { |
2725 | break; |
2726 | } |
2727 | Q_FALLTHROUGH(); |
2728 | // request to merge AnyChar/DetectChar |
2729 | case Context::Rule::Type::AnyChar: |
2730 | case Context::Rule::Type::DetectChar: |
2731 | if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar |
2732 | || (rule2.type == Context::Rule::Type::StringDetect && rule2.dynamic != XmlBool::True && rule2.string.size() == 1)) |
2733 | && isCommonCompatible() && rule1.column == rule2.column) { |
2734 | qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule" ; |
2735 | success = false; |
2736 | } |
2737 | break; |
2738 | |
2739 | // request to merge multiple RegExpr |
2740 | case Context::Rule::Type::RegExpr: |
2741 | if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic |
2742 | && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) { |
2743 | qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule" ; |
2744 | success = false; |
2745 | } |
2746 | break; |
2747 | |
2748 | case Context::Rule::Type::DetectSpaces: |
2749 | case Context::Rule::Type::HlCChar: |
2750 | case Context::Rule::Type::HlCHex: |
2751 | case Context::Rule::Type::HlCOct: |
2752 | case Context::Rule::Type::HlCStringChar: |
2753 | case Context::Rule::Type::Int: |
2754 | case Context::Rule::Type::Float: |
2755 | case Context::Rule::Type::LineContinue: |
2756 | case Context::Rule::Type::WordDetect: |
2757 | case Context::Rule::Type::Detect2Chars: |
2758 | case Context::Rule::Type::IncludeRules: |
2759 | case Context::Rule::Type::DetectIdentifier: |
2760 | case Context::Rule::Type::keyword: |
2761 | case Context::Rule::Type::Unknown: |
2762 | case Context::Rule::Type::RangeDetect: |
2763 | break; |
2764 | } |
2765 | } |
2766 | |
2767 | return success; |
2768 | } |
2769 | |
2770 | //! Initialize the referenced context (ContextName::context) |
2771 | //! Some input / output examples are: |
2772 | //! - "#stay" -> "" |
2773 | //! - "#pop" -> "" |
2774 | //! - "Comment" -> "Comment" |
2775 | //! - "#pop!Comment" -> "Comment" |
2776 | //! - "##ISO C++" -> "" |
2777 | //! - "Comment##ISO C++"-> "Comment" in ISO C++ |
2778 | void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line) |
2779 | { |
2780 | QStringView name = contextName.name; |
2781 | if (name.isEmpty()) { |
2782 | contextName.stay = true; |
2783 | } else if (name.startsWith(s: u"#stay"_sv )) { |
2784 | contextName.stay = true; |
2785 | if (name.size() > 5) { |
2786 | qWarning() << definition.filename << "line" << line << "invalid context in" << context.name; |
2787 | m_success = false; |
2788 | } |
2789 | } else { |
2790 | while (name.startsWith(s: u"#pop"_sv )) { |
2791 | name = name.sliced(pos: 4); |
2792 | ++contextName.popCount; |
2793 | } |
2794 | |
2795 | if (contextName.popCount && !name.isEmpty()) { |
2796 | if (name.startsWith(c: u'!') && name.size() > 1) { |
2797 | name = name.sliced(pos: 1); |
2798 | } else { |
2799 | qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name; |
2800 | m_success = false; |
2801 | } |
2802 | } |
2803 | |
2804 | if (!name.isEmpty()) { |
2805 | const int idx = name.indexOf(s: u"##"_sv ); |
2806 | if (idx == -1) { |
2807 | auto it = definition.contexts.find(key: name.toString()); |
2808 | if (it != definition.contexts.end()) { |
2809 | contextName.context = &*it; |
2810 | } |
2811 | } else { |
2812 | auto defName = name.sliced(pos: idx + 2); |
2813 | auto it = m_definitions.find(key: defName.toString()); |
2814 | if (it != m_definitions.end()) { |
2815 | auto listName = name.sliced(pos: 0, n: idx).toString(); |
2816 | definition.referencedDefinitions.insert(value: &*it); |
2817 | auto ctxIt = it->contexts.find(key: listName.isEmpty() ? it->firstContextName : listName); |
2818 | if (ctxIt != it->contexts.end()) { |
2819 | contextName.context = &*ctxIt; |
2820 | } |
2821 | } else { |
2822 | qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name; |
2823 | m_success = false; |
2824 | } |
2825 | } |
2826 | |
2827 | if (!contextName.context) { |
2828 | qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name; |
2829 | m_success = false; |
2830 | } |
2831 | } |
2832 | } |
2833 | } |
2834 | |
2835 | QMap<QString, Definition> m_definitions; |
2836 | QHash<QString, QString> m_names; |
2837 | Definition *m_currentDefinition = nullptr; |
2838 | Keywords *m_currentKeywords = nullptr; |
2839 | Context *m_currentContext = nullptr; |
2840 | // xml reader variable |
2841 | //@{ |
2842 | QString m_textContent; |
2843 | bool m_inKeywordItem = false; |
2844 | //@} |
2845 | bool m_success = true; |
2846 | }; |
2847 | |
2848 | class HlCompressor |
2849 | { |
2850 | public: |
2851 | HlCompressor(const QString &kateVersion) |
2852 | : m_kateVersion(kateVersion) |
2853 | { |
2854 | m_hasElems.push_back(t: true); |
2855 | } |
2856 | |
2857 | const QString &compressedXML() const |
2858 | { |
2859 | return m_data; |
2860 | } |
2861 | |
2862 | /** |
2863 | * Reduce xml space by removing what is superfluous. |
2864 | * - transforms boolean values into 0 or 1. |
2865 | * - remove unused attributes. |
2866 | * - remove spaces and comments. |
2867 | * - remove context attributes referring to #stay (because this is the default). |
2868 | * - replace Detect2Chars with StringDetect (String="xy" is shorter than char="x" char1="y"). |
2869 | * - sort contexts by frequency of use to accelerate their search during loading. |
2870 | */ |
2871 | void processElement(const QXmlStreamReader &xml) |
2872 | { |
2873 | switch (xml.tokenType()) { |
2874 | case QXmlStreamReader::StartElement: { |
2875 | closePreviousOpenTag(out&: m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data); |
2876 | m_hasElems.push_back(t: false); |
2877 | |
2878 | const auto tagName = xml.name(); |
2879 | if (tagName == u"contexts"_sv ) { |
2880 | m_inContexts = true; |
2881 | m_data += u"<contexts"_sv ; |
2882 | } else if (m_inContexts) { |
2883 | Context &ctx = (m_contexts.empty() || tagName == u"context"_sv ) ? m_contexts.emplace_back() : m_contexts.back(); |
2884 | QString &out = ctx.data; |
2885 | const bool isDetect2Chars = tagName == u"Detect2Chars"_sv ; |
2886 | out += u'<' % (isDetect2Chars ? u"StringDetect"_sv : tagName); |
2887 | |
2888 | auto attrs = xml.attributes(); |
2889 | sortAttributes(attrs); |
2890 | for (const auto &attr : attrs) { |
2891 | const auto attrName = attr.name(); |
2892 | auto value = attr.value(); |
2893 | // transform Detect2Chars char and char1 attributes to StringDetect String attribute |
2894 | if (isDetect2Chars && (attrName == u"char"_sv || attrName == u"char1"_sv )) { |
2895 | if (attrName == u"char"_sv ) { |
2896 | const auto ch0 = value; |
2897 | const auto ch1 = attrs.value(qualifiedName: u"char1"_sv ); |
2898 | QChar chars[]{ch0.isEmpty() ? u' ' : ch0[0], ch1.isEmpty() ? u' ' : ch1[0]}; |
2899 | writeXmlAttribute(out, attrName: u"String"_sv , value: QStringView(chars, 2), tagName); |
2900 | } |
2901 | } else if (attrName == u"context"_sv || attrName == u"lineEndContext"_sv || attrName == u"fallthroughContext"_sv |
2902 | || attrName == u"lineEmptyContext"_sv ) { |
2903 | // ignore #stay context because this is the default |
2904 | if (value != u"#stay"_sv ) { |
2905 | writeXmlAttribute(out, attrName, value, tagName); |
2906 | |
2907 | /* |
2908 | * Extract context name and increment context counter |
2909 | */ |
2910 | bool hasPop = false; |
2911 | while (value.startsWith(s: u"#pop"_sv )) { |
2912 | hasPop = true; |
2913 | value = value.sliced(pos: 4); |
2914 | } |
2915 | if (hasPop && !value.isEmpty()) { |
2916 | value = value.sliced(pos: 1); |
2917 | } |
2918 | if (!value.isEmpty() && -1 == value.indexOf(s: u"##"_sv )) { |
2919 | m_contextRefs[value.toString()]++; |
2920 | } |
2921 | } |
2922 | } else if (tagName == u"LineContinue"_sv && attrName == u"char"_sv && value == u"\\" ) { |
2923 | // ignore char="\\" with LineContinue |
2924 | } else { |
2925 | if (attrName == u"name"_sv ) { |
2926 | ctx.name = value.toString(); |
2927 | } |
2928 | writeXmlAttribute(out, attrName, value, tagName); |
2929 | } |
2930 | } |
2931 | } else if (m_inList) { |
2932 | m_inItem = true; |
2933 | m_isIncludeItem = (tagName == u"include"_sv ); |
2934 | } else { |
2935 | if (tagName == u"list"_sv ) { |
2936 | m_keywords.clear(); |
2937 | m_inList = true; |
2938 | } |
2939 | m_data += u'<' % tagName; |
2940 | const auto attrs = xml.attributes(); |
2941 | for (const auto &attr : attrs) { |
2942 | auto name = attr.name(); |
2943 | auto value = (name == u"kateversion" ) ? QStringView(m_kateVersion) : attr.value(); |
2944 | writeXmlAttribute(out&: m_data, attrName: name, value, tagName); |
2945 | } |
2946 | } |
2947 | break; |
2948 | } |
2949 | |
2950 | case QXmlStreamReader::EndElement: { |
2951 | const auto tagName = xml.name(); |
2952 | if (m_inItem) { |
2953 | m_inItem = false; |
2954 | m_hasElems.pop_back(); |
2955 | break; |
2956 | } else if (m_inList) { |
2957 | m_inList = false; |
2958 | std::sort(first: m_keywords.begin(), last: m_keywords.end()); |
2959 | m_keywords.erase(first: std::unique(first: m_keywords.begin(), last: m_keywords.end()), last: m_keywords.end()); |
2960 | for (const auto &item : m_keywords) { |
2961 | m_data += item.isIncludeTag ? u"<include>"_sv : u"<item>"_sv ; |
2962 | writeXmlText(out&: m_data, text: item.text); |
2963 | m_data += item.isIncludeTag ? u"</include>"_sv : u"</item>"_sv ; |
2964 | } |
2965 | } else if (m_inContexts && !m_contexts.empty() && tagName == u"contexts"_sv ) { |
2966 | m_inContexts = false; |
2967 | // sorting contexts by the most used (ignore first context) |
2968 | std::sort(first: m_contexts.begin() + 1, last: m_contexts.end(), comp: [&](auto &ctx1, auto &ctx2) { |
2969 | auto i1 = m_contextRefs.value(ctx1.name); |
2970 | auto i2 = m_contextRefs.value(ctx2.name); |
2971 | if (i1 != i2) { |
2972 | return i1 > i2; |
2973 | } |
2974 | // for a reproducible build, contexts with the same number of uses are sorted by name |
2975 | return ctx1.name < ctx2.name; |
2976 | }); |
2977 | for (const auto &ctx : m_contexts) { |
2978 | m_data += ctx.data; |
2979 | } |
2980 | } |
2981 | |
2982 | QString &out = m_inContexts && !m_contexts.empty() ? m_contexts.back().data : m_data; |
2983 | if (m_hasElems.back()) { |
2984 | out += u"</"_sv % tagName % u'>'; |
2985 | } else { |
2986 | out += u"/>"_sv ; |
2987 | } |
2988 | m_hasElems.pop_back(); |
2989 | break; |
2990 | } |
2991 | |
2992 | case QXmlStreamReader::EntityReference: |
2993 | case QXmlStreamReader::Characters: |
2994 | if (m_inItem) { |
2995 | m_keywords.push_back(x: {.text: xml.text().toString(), .isIncludeTag: m_isIncludeItem}); |
2996 | } |
2997 | break; |
2998 | |
2999 | default:; |
3000 | } |
3001 | } |
3002 | |
3003 | private: |
3004 | void closePreviousOpenTag(QString &out) |
3005 | { |
3006 | if (!m_hasElems.back()) { |
3007 | m_hasElems.back() = true; |
3008 | out += u'>'; |
3009 | } |
3010 | } |
3011 | |
3012 | /** |
3013 | * Write \p text escaping special characters. |
3014 | */ |
3015 | static void writeXmlText(QString &out, QStringView text, bool escapeDQ = false) |
3016 | { |
3017 | for (const QChar &c : text) { |
3018 | if (c == u'<') { |
3019 | out += u"<"_sv ; |
3020 | } else if (c == u'&') { |
3021 | out += u"&"_sv ; |
3022 | } else if (escapeDQ && c == u'"') { |
3023 | out += u"""_sv ; |
3024 | } else if (c == u'\t') { |
3025 | // non-space whitespace character in an attribute is remplaced with space... |
3026 | out += u"	"_sv ; |
3027 | } else { |
3028 | out += c; |
3029 | } |
3030 | } |
3031 | } |
3032 | |
3033 | /** |
3034 | * Write attribut in \p out. |
3035 | * Booleans are converted to 0, 1 or ignored if this corresponds to the default value. |
3036 | * Values will be written with either double quotes or single quotes, |
3037 | * depending on which takes up the least space |
3038 | */ |
3039 | static void writeXmlAttribute(QString &out, QStringView attrName, QStringView value, QStringView tagName) |
3040 | { |
3041 | enum class DefaultBool { |
3042 | // default value is false |
3043 | False, |
3044 | // default value is true |
3045 | True, |
3046 | // manipulate as a tribool whose attribute absence is equivalent to None |
3047 | None, |
3048 | // not used |
3049 | Ignored, |
3050 | // default value is false, but None for <keyword> |
3051 | FalseOrKeywordTag, |
3052 | // default value is true, but depends on another value for <keywords> |
3053 | TrueOrKeywordsTag, |
3054 | // default is false, but ignored in <context> |
3055 | DynamicAttr, |
3056 | }; |
3057 | static const QHash<QStringView, DefaultBool> booleanAttrs({ |
3058 | {u"fallthrough"_sv , DefaultBool::Ignored}, |
3059 | {u"dynamic"_sv , DefaultBool::DynamicAttr}, |
3060 | {u"hidden"_sv , DefaultBool::False}, |
3061 | {u"indentationsensitive"_sv , DefaultBool::False}, |
3062 | {u"noIndentationBasedFolding"_sv , DefaultBool::False}, |
3063 | {u"lookAhead"_sv , DefaultBool::False}, |
3064 | {u"firstNonSpace"_sv , DefaultBool::False}, |
3065 | {u"insensitive"_sv , DefaultBool::FalseOrKeywordTag}, |
3066 | {u"minimal"_sv , DefaultBool::False}, |
3067 | {u"includeAttrib"_sv , DefaultBool::False}, |
3068 | {u"italic"_sv , DefaultBool::None}, |
3069 | {u"bold"_sv , DefaultBool::None}, |
3070 | {u"underline"_sv , DefaultBool::None}, |
3071 | {u"strikeOut"_sv , DefaultBool::None}, |
3072 | {u"spellChecking"_sv , DefaultBool::True}, |
3073 | {u"casesensitive"_sv , DefaultBool::TrueOrKeywordsTag}, |
3074 | {u"ignored"_sv , DefaultBool::Ignored}, |
3075 | }); |
3076 | |
3077 | auto it = booleanAttrs.find(key: attrName); |
3078 | // convert boolean value |
3079 | if (it != booleanAttrs.end()) { |
3080 | bool b = KSyntaxHighlighting::Xml::attrToBool(str: value); |
3081 | bool ignoreAttr = false; |
3082 | switch (*it) { |
3083 | case DefaultBool::Ignored: |
3084 | ignoreAttr = true; |
3085 | break; |
3086 | case DefaultBool::TrueOrKeywordsTag: |
3087 | ignoreAttr = (tagName == u"keywords"_sv ) ? false : b; |
3088 | break; |
3089 | case DefaultBool::True: |
3090 | ignoreAttr = b; |
3091 | break; |
3092 | case DefaultBool::FalseOrKeywordTag: |
3093 | ignoreAttr = (tagName == u"keyword"_sv ) ? false : !b; |
3094 | break; |
3095 | case DefaultBool::DynamicAttr: |
3096 | ignoreAttr = (tagName == u"context"_sv ) || !b; |
3097 | break; |
3098 | case DefaultBool::False: |
3099 | ignoreAttr = !b; |
3100 | break; |
3101 | case DefaultBool::None: |
3102 | ignoreAttr = false; |
3103 | break; |
3104 | } |
3105 | if (!ignoreAttr) { |
3106 | out += u' ' % attrName % u"=\""_sv % (b ? u'1' : u'0') % u'"'; |
3107 | } |
3108 | } else { |
3109 | const bool hasDQ = value.contains(c: u'"'); |
3110 | // attribute in double quotes when the value does not contain " or contains " and ' |
3111 | if (!hasDQ || value.contains(c: u'\'')) { |
3112 | out += u' ' % attrName % u"=\""_sv ; |
3113 | writeXmlText(out, text: value, escapeDQ: hasDQ); |
3114 | out += u'"'; |
3115 | // attribute in single quotes because the value contains " |
3116 | } else { |
3117 | out += u' ' % attrName % u"='"_sv ; |
3118 | writeXmlText(out, text: value); |
3119 | out += u'\''; |
3120 | } |
3121 | } |
3122 | } |
3123 | |
3124 | /** |
3125 | * Sort attributes for better compression by rcc. |
3126 | */ |
3127 | static void sortAttributes(QXmlStreamAttributes &attrs) |
3128 | { |
3129 | static const QHash<QStringView, int> priorityAttrs({ |
3130 | // context and rule |
3131 | {u"attribute"_sv , 5}, |
3132 | |
3133 | // context and itemData |
3134 | {u"name"_sv , 4}, |
3135 | |
3136 | // context |
3137 | {u"noIndentationBasedFolding"_sv , 11}, |
3138 | {u"lineEndContext"_sv , 9}, |
3139 | {u"lineEmptyContext"_sv , 8}, |
3140 | {u"fallthroughContext"_sv , 7}, |
3141 | |
3142 | // rule |
3143 | {u"lookAhead"_sv , 100}, |
3144 | {u"firstNonSpace"_sv , 99}, |
3145 | {u"dynamic"_sv , 98}, |
3146 | {u"minimal"_sv , 97}, |
3147 | {u"includeAttrib"_sv , 96}, |
3148 | {u"insensitive"_sv , 95}, |
3149 | {u"column"_sv , 50}, |
3150 | {u"beginRegion"_sv , 40}, |
3151 | {u"endRegion"_sv , 41}, |
3152 | {u"weakDeliminator"_sv , 31}, |
3153 | {u"additionalDeliminator"_sv , 30}, |
3154 | {u"context"_sv , 20}, |
3155 | {u"String"_sv , 2}, |
3156 | {u"char"_sv , 2}, |
3157 | |
3158 | // itemData |
3159 | {u"strikeOut"_sv , 100}, |
3160 | {u"underline"_sv , 99}, |
3161 | {u"italic"_sv , 98}, |
3162 | {u"bold"_sv , 97}, |
3163 | {u"spellChecking"_sv , 96}, |
3164 | {u"defStyleNum"_sv , 95}, |
3165 | {u"color"_sv , 94}, |
3166 | {u"backgroundColor"_sv , 93}, |
3167 | {u"selBackgroundColor"_sv , 92}, |
3168 | {u"selColor"_sv , 91}, |
3169 | }); |
3170 | std::sort(first: attrs.begin(), last: attrs.end(), comp: [](auto &attr1, auto &attr2) { |
3171 | auto i1 = priorityAttrs.value(attr1.name()); |
3172 | auto i2 = priorityAttrs.value(attr2.name()); |
3173 | if (i1 != i2) { |
3174 | return i1 < i2; |
3175 | } |
3176 | return attr1.name() < attr2.name(); |
3177 | }); |
3178 | } |
3179 | |
3180 | struct Context { |
3181 | QString name; |
3182 | QString data; |
3183 | }; |
3184 | struct Item { |
3185 | QString text; |
3186 | bool isIncludeTag; |
3187 | |
3188 | std::strong_ordering operator<=>(const Item &other) const = default; |
3189 | }; |
3190 | QString m_data = u"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE language>"_s ; |
3191 | std::vector<Context> m_contexts; |
3192 | QHash<QString, int> m_contextRefs; |
3193 | std::vector<Item> m_keywords; |
3194 | QVarLengthArray<bool, 8> m_hasElems; |
3195 | QString m_kateVersion; |
3196 | bool m_inContexts = false; |
3197 | bool m_inList = false; |
3198 | bool m_inItem = false; |
3199 | bool m_isIncludeItem = false; |
3200 | }; |
3201 | |
3202 | void printFileError(const QFile &file) |
3203 | { |
3204 | qWarning() << "Failed to open" << file.fileName() << "-" << file.errorString(); |
3205 | } |
3206 | |
3207 | void printXmlError(const QString &fileName, const QXmlStreamReader &xml) |
3208 | { |
3209 | qWarning() << fileName << "-" << xml.errorString() << "@ offset" << xml.characterOffset(); |
3210 | }; |
3211 | |
3212 | QStringList readListing(const QString &fileName) |
3213 | { |
3214 | QFile file(fileName); |
3215 | if (!file.open(flags: QIODevice::ReadOnly)) { |
3216 | printFileError(file); |
3217 | return QStringList(); |
3218 | } |
3219 | |
3220 | QXmlStreamReader xml(&file); |
3221 | QStringList listing; |
3222 | while (!xml.atEnd()) { |
3223 | xml.readNext(); |
3224 | |
3225 | // add only .xml files, no .json or stuff |
3226 | if (xml.isCharacters() && xml.text().contains(s: QLatin1String(".xml" ))) { |
3227 | listing.append(t: xml.text().toString()); |
3228 | } |
3229 | } |
3230 | |
3231 | if (xml.hasError()) { |
3232 | printXmlError(fileName, xml); |
3233 | listing.clear(); |
3234 | } |
3235 | |
3236 | return listing; |
3237 | } |
3238 | |
3239 | /** |
3240 | * check if the "extensions" attribute have valid wildcards |
3241 | * @param extensions extensions string to check |
3242 | * @return valid? |
3243 | */ |
3244 | bool checkExtensions(QStringView extensions) |
3245 | { |
3246 | // get list of extensions |
3247 | const QList<QStringView> extensionParts = extensions.split(sep: u';', behavior: Qt::SkipEmptyParts); |
3248 | |
3249 | // ok if empty |
3250 | if (extensionParts.isEmpty()) { |
3251 | return true; |
3252 | } |
3253 | |
3254 | // check that only valid wildcard things are inside the parts |
3255 | for (const auto &extension : extensionParts) { |
3256 | for (const auto c : extension) { |
3257 | // eat normal things |
3258 | if (c.isDigit() || c.isLetter()) { |
3259 | continue; |
3260 | } |
3261 | |
3262 | // allow some special characters |
3263 | if (c == u'.' || c == u'-' || c == u'_' || c == u'+') { |
3264 | continue; |
3265 | } |
3266 | |
3267 | // only allowed wildcard things: '?' and '*' |
3268 | if (c == u'?' || c == u'*') { |
3269 | continue; |
3270 | } |
3271 | |
3272 | qWarning() << "invalid character" << c << "seen in extensions wildcard" ; |
3273 | return false; |
3274 | } |
3275 | } |
3276 | |
3277 | // all checks passed |
3278 | return true; |
3279 | } |
3280 | |
3281 | struct CompressedFile { |
3282 | QString fileName; |
3283 | QString xmlData; |
3284 | }; |
3285 | |
3286 | } |
3287 | |
3288 | int main(int argc, char *argv[]) |
3289 | { |
3290 | // get app instance |
3291 | QCoreApplication app(argc, argv); |
3292 | |
3293 | // ensure enough arguments are passed |
3294 | if (app.arguments().size() < 4) { |
3295 | return 1; |
3296 | } |
3297 | |
3298 | #ifdef HAS_XERCESC |
3299 | // care for proper init and cleanup |
3300 | XMLPlatformUtils::Initialize(); |
3301 | auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate); |
3302 | |
3303 | /* |
3304 | * parse XSD first time and cache it |
3305 | */ |
3306 | XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager); |
3307 | |
3308 | // create parser for the XSD |
3309 | CustomXMLValidator parser(&xsd); |
3310 | |
3311 | // load grammar into the pool, on error just abort |
3312 | const auto xsdFile = app.arguments().at(2); |
3313 | if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || parser.eh.failed()) { |
3314 | qWarning("Failed to parse XSD %s: %s" , qPrintable(xsdFile), qPrintable(parser.messages)); |
3315 | return 2; |
3316 | } |
3317 | |
3318 | // lock the pool, no later modifications wanted! |
3319 | xsd.lockPool(); |
3320 | #endif |
3321 | |
3322 | const QString hlFilenamesListing = app.arguments().value(i: 3); |
3323 | if (hlFilenamesListing.isEmpty()) { |
3324 | return 1; |
3325 | } |
3326 | |
3327 | QStringList hlFilenames = readListing(fileName: hlFilenamesListing); |
3328 | if (hlFilenames.isEmpty()) { |
3329 | qWarning(msg: "Failed to read %s" , qPrintable(hlFilenamesListing)); |
3330 | return 3; |
3331 | } |
3332 | |
3333 | // text attributes |
3334 | const QStringList textAttributes = QStringList() << QStringLiteral("name" ) << QStringLiteral("alternativeNames" ) << QStringLiteral("section" ) |
3335 | << QStringLiteral("mimetype" ) << QStringLiteral("extensions" ) << QStringLiteral("style" ) |
3336 | << QStringLiteral("author" ) << QStringLiteral("license" ) << QStringLiteral("indenter" ); |
3337 | |
3338 | // index all given highlightings |
3339 | HlFilesChecker filesChecker; |
3340 | QVariantMap hls; |
3341 | int anyError = 0; |
3342 | std::vector<CompressedFile> compressedFiles; |
3343 | for (const QString &hlFilename : std::as_const(t&: hlFilenames)) { |
3344 | QFile hlFile(hlFilename); |
3345 | if (!hlFile.open(flags: QIODevice::ReadOnly)) { |
3346 | printFileError(file: hlFile); |
3347 | anyError = 3; |
3348 | continue; |
3349 | } |
3350 | |
3351 | #ifdef HAS_XERCESC |
3352 | // create parser |
3353 | CustomXMLValidator parser(&xsd); |
3354 | |
3355 | // parse the XML file |
3356 | parser.parse((const char16_t *)hlFile.fileName().utf16()); |
3357 | |
3358 | // report issues |
3359 | if (parser.eh.failed()) { |
3360 | qWarning("Failed to validate XML %s: %s" , qPrintable(hlFile.fileName()), qPrintable(parser.messages)); |
3361 | anyError = 4; |
3362 | continue; |
3363 | } |
3364 | #endif |
3365 | |
3366 | // read the needed attributes from toplevel language tag |
3367 | hlFile.reset(); |
3368 | QXmlStreamReader xml(&hlFile); |
3369 | if (xml.readNextStartElement()) { |
3370 | if (xml.name() != QLatin1String("language" )) { |
3371 | anyError = 5; |
3372 | continue; |
3373 | } |
3374 | } else { |
3375 | anyError = 6; |
3376 | continue; |
3377 | } |
3378 | |
3379 | // map to store hl info |
3380 | QVariantMap hl; |
3381 | |
3382 | // transfer text attributes |
3383 | for (const QString &attribute : std::as_const(t: textAttributes)) { |
3384 | hl[attribute] = xml.attributes().value(qualifiedName: attribute).toString(); |
3385 | } |
3386 | |
3387 | // check if extensions have the right format |
3388 | if (!checkExtensions(extensions: hl[QStringLiteral("extensions" )].toString())) { |
3389 | qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions" )].toString(); |
3390 | anyError = 23; |
3391 | } |
3392 | |
3393 | // numerical attributes |
3394 | hl[QStringLiteral("version" )] = xml.attributes().value(qualifiedName: QLatin1String("version" )).toInt(); |
3395 | hl[QStringLiteral("priority" )] = xml.attributes().value(qualifiedName: QLatin1String("priority" )).toInt(); |
3396 | |
3397 | // add boolean one |
3398 | hl[QStringLiteral("hidden" )] = attrToBool(str: xml.attributes().value(qualifiedName: QLatin1String("hidden" ))); |
3399 | |
3400 | // keep some strings as UTF-8 for faster translations |
3401 | hl[QStringLiteral("nameUtf8" )] = hl[QStringLiteral("name" )].toString().toUtf8(); |
3402 | hl[QStringLiteral("sectionUtf8" )] = hl[QStringLiteral("section" )].toString().toUtf8(); |
3403 | |
3404 | // remember hl |
3405 | hls[QFileInfo(hlFile).fileName()] = hl; |
3406 | |
3407 | const QStringView kateversion = xml.attributes().value(QStringLiteral("kateversion" )); |
3408 | const QString hlName = hl[QStringLiteral("name" )].toString(); |
3409 | const QString hlAlternativeNames = hl[QStringLiteral("alternativeNames" )].toString(); |
3410 | |
3411 | filesChecker.setDefinition(verStr: kateversion, filename: hlFilename, name: hlName, alternativeNames: hlAlternativeNames.split(sep: u';', behavior: Qt::SkipEmptyParts)); |
3412 | |
3413 | // As the compressor removes "fallthrough" attribute which is required with |
3414 | // "fallthroughContext" before the 5.62 version, the minimum version is |
3415 | // automatically increased |
3416 | HlCompressor compressor((filesChecker.currentVersion() < KateVersion{5, 62}) ? u"5.62"_s : kateversion.toString()); |
3417 | compressor.processElement(xml); |
3418 | |
3419 | // scan for broken regex or keywords with spaces |
3420 | while (!xml.atEnd()) { |
3421 | xml.readNext(); |
3422 | filesChecker.processElement(xml); |
3423 | compressor.processElement(xml); |
3424 | } |
3425 | |
3426 | if (xml.hasError()) { |
3427 | anyError = 33; |
3428 | printXmlError(fileName: hlFilename, xml); |
3429 | } |
3430 | |
3431 | compressedFiles.emplace_back(args: CompressedFile{ |
3432 | .fileName: QFileInfo(hlFilename).fileName(), |
3433 | .xmlData: compressor.compressedXML(), |
3434 | }); |
3435 | } |
3436 | |
3437 | filesChecker.resolveContexts(); |
3438 | |
3439 | if (!filesChecker.check()) { |
3440 | anyError = 7; |
3441 | } |
3442 | |
3443 | // bail out if any problem was seen |
3444 | if (anyError) { |
3445 | return anyError; |
3446 | } |
3447 | |
3448 | // check compressed file |
3449 | HlFilesChecker filesChecker2; |
3450 | const QString compressedDir = app.arguments().at(i: 4) + u"/"_sv ; |
3451 | for (const auto &compressedFile : std::as_const(t&: compressedFiles)) { |
3452 | const auto outFileName = compressedDir + compressedFile.fileName; |
3453 | auto utf8Data = compressedFile.xmlData.toUtf8(); |
3454 | |
3455 | #ifdef HAS_XERCESC |
3456 | // create parser |
3457 | CustomXMLValidator parser(&xsd); |
3458 | |
3459 | auto utf8Filename = outFileName.toUtf8(); |
3460 | utf8Filename.append('\0'); |
3461 | // parse the XML file |
3462 | MemBufInputSource membuf(reinterpret_cast<const XMLByte *>(utf8Data.constData()), utf8Data.size(), utf8Filename.data()); |
3463 | |
3464 | // report issues |
3465 | if (parser.eh.failed()) { |
3466 | qWarning("Failed to validate XML %s: %s" , qPrintable(outFileName), qPrintable(parser.messages)); |
3467 | return 8; |
3468 | } |
3469 | #endif |
3470 | |
3471 | QBuffer buffer(&utf8Data); |
3472 | buffer.open(openMode: QBuffer::ReadOnly); |
3473 | QXmlStreamReader xml(&buffer); |
3474 | // scan for broken file |
3475 | while (!xml.atEnd()) { |
3476 | if (xml.readNext() == QXmlStreamReader::TokenType::StartElement && xml.name() == u"language"_sv ) { |
3477 | const auto attrs = xml.attributes(); |
3478 | const auto version = attrs.value(qualifiedName: u"kateversion"_sv ); |
3479 | const QString hlName = attrs.value(qualifiedName: u"name"_sv ).toString(); |
3480 | const QString hlAlternativeNames = attrs.value(qualifiedName: u"alternativeNames"_sv ).toString(); |
3481 | filesChecker2.setDefinition(verStr: version, filename: outFileName, name: hlName, alternativeNames: hlAlternativeNames.split(sep: u';', behavior: Qt::SkipEmptyParts)); |
3482 | } |
3483 | filesChecker2.processElement(xml); |
3484 | } |
3485 | |
3486 | if (xml.hasError()) { |
3487 | printXmlError(fileName: outFileName, xml); |
3488 | return 9; |
3489 | } |
3490 | |
3491 | // create outfile, after all has worked! |
3492 | QFile outFile(outFileName); |
3493 | if (!outFile.open(flags: QIODevice::WriteOnly | QIODevice::Truncate)) { |
3494 | return 10; |
3495 | } |
3496 | outFile.write(data: utf8Data); |
3497 | } |
3498 | |
3499 | filesChecker2.resolveContexts(); |
3500 | |
3501 | // bail out if any problem was seen |
3502 | if (!filesChecker2.check()) { |
3503 | return 11; |
3504 | } |
3505 | |
3506 | // create outfile, after all has worked! |
3507 | QFile outFile(app.arguments().at(i: 1)); |
3508 | if (!outFile.open(flags: QIODevice::WriteOnly | QIODevice::Truncate)) { |
3509 | return 12; |
3510 | } |
3511 | |
3512 | // write out json |
3513 | outFile.write(data: QCborValue::fromVariant(variant: QVariant(hls)).toCbor()); |
3514 | |
3515 | // be done |
3516 | return 0; |
3517 | } |
3518 | |