1 | /* |
2 | SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org> |
3 | SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com> |
4 | |
5 | SPDX-License-Identifier: MIT |
6 | */ |
7 | |
8 | #include <QCborValue> |
9 | #include <QCoreApplication> |
10 | #include <QDebug> |
11 | #include <QFile> |
12 | #include <QFileInfo> |
13 | #include <QMutableMapIterator> |
14 | #include <QRegularExpression> |
15 | #include <QScopeGuard> |
16 | #include <QVariant> |
17 | #include <QXmlStreamReader> |
18 | |
19 | #ifdef HAS_XERCESC |
20 | |
21 | #include <xercesc/framework/XMLGrammarPoolImpl.hpp> |
22 | |
23 | #include <xercesc/parsers/SAX2XMLReaderImpl.hpp> |
24 | |
25 | #include <xercesc/sax/ErrorHandler.hpp> |
26 | #include <xercesc/sax/SAXParseException.hpp> |
27 | |
28 | #include <xercesc/util/PlatformUtils.hpp> |
29 | #include <xercesc/util/XMLString.hpp> |
30 | #include <xercesc/util/XMLUni.hpp> |
31 | |
32 | #include <xercesc/framework/XMLGrammarPoolImpl.hpp> |
33 | #include <xercesc/validators/common/Grammar.hpp> |
34 | |
35 | using namespace xercesc; |
36 | |
37 | /* |
38 | * Ideas taken from: |
39 | * |
40 | * author : Boris Kolpackov <boris@codesynthesis.com> |
41 | * copyright : not copyrighted - public domain |
42 | * |
43 | * This program uses Xerces-C++ SAX2 parser to load a set of schema files |
44 | * and then to validate a set of XML documents against these schemas. To |
45 | * build this program you will need Xerces-C++ 3.0.0 or later. For more |
46 | * information, see: |
47 | * |
48 | * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/ |
49 | */ |
50 | |
51 | /** |
52 | * Error handler object used during xml schema validation. |
53 | */ |
54 | class CustomErrorHandler : public ErrorHandler |
55 | { |
56 | public: |
57 | /** |
58 | * Constructor |
59 | * @param messages Pointer to the error message string to fill. |
60 | */ |
61 | CustomErrorHandler(QString *messages) |
62 | : m_messages(messages) |
63 | { |
64 | } |
65 | |
66 | /** |
67 | * Check global success/fail state. |
68 | * @return True if there was a failure, false otherwise. |
69 | */ |
70 | bool failed() const |
71 | { |
72 | return m_failed; |
73 | } |
74 | |
75 | private: |
76 | /** |
77 | * Severity classes for error messages. |
78 | */ |
79 | enum severity { s_warning, s_error, s_fatal }; |
80 | |
81 | /** |
82 | * Wrapper for warning exceptions. |
83 | * @param e Exception to handle. |
84 | */ |
85 | void warning(const SAXParseException &e) override |
86 | { |
87 | m_failed = true; // be strict, warnings are evil, too! |
88 | handle(e, s_warning); |
89 | } |
90 | |
91 | /** |
92 | * Wrapper for error exceptions. |
93 | * @param e Exception to handle. |
94 | */ |
95 | void error(const SAXParseException &e) override |
96 | { |
97 | m_failed = true; |
98 | handle(e, s_error); |
99 | } |
100 | |
101 | /** |
102 | * Wrapper for fatal error exceptions. |
103 | * @param e Exception to handle. |
104 | */ |
105 | void fatalError(const SAXParseException &e) override |
106 | { |
107 | m_failed = true; |
108 | handle(e, s_fatal); |
109 | } |
110 | |
111 | /** |
112 | * Reset the error status to "no error". |
113 | */ |
114 | void resetErrors() override |
115 | { |
116 | m_failed = false; |
117 | } |
118 | |
119 | /** |
120 | * Generic handler for error/warning/fatal error message exceptions. |
121 | * @param e Exception to handle. |
122 | * @param s Enum value encoding the message severtity. |
123 | */ |
124 | void handle(const SAXParseException &e, severity s) |
125 | { |
126 | // get id to print |
127 | const XMLCh *xid(e.getPublicId()); |
128 | if (!xid) |
129 | xid = e.getSystemId(); |
130 | |
131 | m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: " ) |
132 | << QString::fromUtf16(e.getMessage()) << Qt::endl; |
133 | } |
134 | |
135 | private: |
136 | /** |
137 | * Storage for created error messages in this handler. |
138 | */ |
139 | QTextStream m_messages; |
140 | |
141 | /** |
142 | * Global error state. True if there was an error, false otherwise. |
143 | */ |
144 | bool m_failed = false; |
145 | }; |
146 | |
147 | void init_parser(SAX2XMLReaderImpl &parser) |
148 | { |
149 | // Commonly useful configuration. |
150 | // |
151 | parser.setFeature(XMLUni::fgSAX2CoreNameSpaces, true); |
152 | parser.setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true); |
153 | parser.setFeature(XMLUni::fgSAX2CoreValidation, true); |
154 | |
155 | // Enable validation. |
156 | // |
157 | parser.setFeature(XMLUni::fgXercesSchema, true); |
158 | parser.setFeature(XMLUni::fgXercesSchemaFullChecking, true); |
159 | parser.setFeature(XMLUni::fgXercesValidationErrorAsFatal, true); |
160 | |
161 | // Use the loaded grammar during parsing. |
162 | // |
163 | parser.setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true); |
164 | |
165 | // Don't load schemas from any other source (e.g., from XML document's |
166 | // xsi:schemaLocation attributes). |
167 | // |
168 | parser.setFeature(XMLUni::fgXercesLoadSchema, false); |
169 | |
170 | // Xerces-C++ 3.1.0 is the first version with working multi import |
171 | // support. |
172 | // |
173 | parser.setFeature(XMLUni::fgXercesHandleMultipleImports, true); |
174 | } |
175 | |
176 | #endif |
177 | |
178 | #include "../lib/worddelimiters_p.h" |
179 | #include "../lib/xml_p.h" |
180 | |
181 | #include <array> |
182 | |
183 | using KSyntaxHighlighting::WordDelimiters; |
184 | using KSyntaxHighlighting::Xml::attrToBool; |
185 | |
186 | class HlFilesChecker |
187 | { |
188 | public: |
189 | template<typename T> |
190 | void setDefinition(const T &verStr, const QString &filename, const QString &name) |
191 | { |
192 | m_currentDefinition = &*m_definitions.insert(key: name, value: Definition{}); |
193 | m_currentDefinition->languageName = name; |
194 | m_currentDefinition->filename = filename; |
195 | m_currentDefinition->kateVersionStr = verStr.toString(); |
196 | m_currentKeywords = nullptr; |
197 | m_currentContext = nullptr; |
198 | |
199 | const auto idx = verStr.indexOf(QLatin1Char('.')); |
200 | if (idx <= 0) { |
201 | qWarning() << filename << "invalid kateversion" << verStr; |
202 | m_success = false; |
203 | } else { |
204 | m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()}; |
205 | } |
206 | } |
207 | |
208 | void processElement(QXmlStreamReader &xml) |
209 | { |
210 | if (xml.isStartElement()) { |
211 | if (m_currentContext) { |
212 | m_currentContext->rules.push_back(t: Context::Rule{}); |
213 | auto &rule = m_currentContext->rules.back(); |
214 | m_success = rule.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
215 | m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True; |
216 | } else if (m_currentKeywords) { |
217 | m_success = m_currentKeywords->items.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
218 | } else if (xml.name() == QStringLiteral("context" )) { |
219 | processContextElement(xml); |
220 | } else if (xml.name() == QStringLiteral("list" )) { |
221 | processListElement(xml); |
222 | } else if (xml.name() == QStringLiteral("keywords" )) { |
223 | m_success = m_currentDefinition->parseKeywords(xml) && m_success; |
224 | } else if (xml.name() == QStringLiteral("emptyLine" )) { |
225 | m_success = parseEmptyLine(filename: m_currentDefinition->filename, xml) && m_success; |
226 | } else if (xml.name() == QStringLiteral("itemData" )) { |
227 | m_success = m_currentDefinition->itemDatas.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
228 | } |
229 | } else if (xml.isEndElement()) { |
230 | if (m_currentContext && xml.name() == QStringLiteral("context" )) { |
231 | m_currentContext = nullptr; |
232 | } else if (m_currentKeywords && xml.name() == QStringLiteral("list" )) { |
233 | m_currentKeywords = nullptr; |
234 | } |
235 | } |
236 | } |
237 | |
238 | //! Resolve context attribute and include tag |
239 | void resolveContexts() |
240 | { |
241 | QMutableMapIterator<QString, Definition> def(m_definitions); |
242 | while (def.hasNext()) { |
243 | def.next(); |
244 | auto &definition = def.value(); |
245 | auto &contexts = definition.contexts; |
246 | |
247 | if (contexts.isEmpty()) { |
248 | qWarning() << definition.filename << "has no context" ; |
249 | m_success = false; |
250 | continue; |
251 | } |
252 | |
253 | auto markAsUsedContext = [](ContextName &contextName) { |
254 | if (!contextName.stay && contextName.context) { |
255 | contextName.context->isOnlyIncluded = false; |
256 | } |
257 | }; |
258 | |
259 | QMutableMapIterator<QString, Context> contextIt(contexts); |
260 | while (contextIt.hasNext()) { |
261 | contextIt.next(); |
262 | auto &context = contextIt.value(); |
263 | resolveContextName(definition, context, contextName&: context.lineEndContext, line: context.line); |
264 | resolveContextName(definition, context, contextName&: context.lineEmptyContext, line: context.line); |
265 | resolveContextName(definition, context, contextName&: context.fallthroughContext, line: context.line); |
266 | markAsUsedContext(context.lineEndContext); |
267 | markAsUsedContext(context.lineEmptyContext); |
268 | markAsUsedContext(context.fallthroughContext); |
269 | for (auto &rule : context.rules) { |
270 | rule.parentContext = &context; |
271 | resolveContextName(definition, context, contextName&: rule.context, line: rule.line); |
272 | if (rule.type != Context::Rule::Type::IncludeRules) { |
273 | markAsUsedContext(rule.context); |
274 | } else if (rule.includeAttrib == XmlBool::True && rule.context.context) { |
275 | rule.context.context->referencedWithIncludeAttrib = true; |
276 | } |
277 | } |
278 | } |
279 | |
280 | auto *firstContext = &*definition.contexts.find(key: definition.firstContextName); |
281 | firstContext->isOnlyIncluded = false; |
282 | definition.firstContext = firstContext; |
283 | } |
284 | |
285 | resolveIncludeRules(); |
286 | } |
287 | |
288 | bool check() const |
289 | { |
290 | bool success = m_success; |
291 | |
292 | const auto usedContexts = extractUsedContexts(); |
293 | |
294 | QMap<const Definition *, const Definition *> maxVersionByDefinitions; |
295 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules; |
296 | |
297 | QMapIterator<QString, Definition> def(m_definitions); |
298 | while (def.hasNext()) { |
299 | def.next(); |
300 | const auto &definition = def.value(); |
301 | const auto &filename = definition.filename; |
302 | |
303 | auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions); |
304 | if (maxDef != &definition) { |
305 | qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr |
306 | << ". Please, increase kateversion." ; |
307 | success = false; |
308 | } |
309 | |
310 | QSet<ItemDatas::Style> usedAttributeNames; |
311 | QSet<ItemDatas::Style> ignoredAttributeNames; |
312 | success = checkKeywordsList(definition) && success; |
313 | success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success; |
314 | |
315 | // search for non-existing itemDatas. |
316 | const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames; |
317 | for (const auto &styleName : invalidNames) { |
318 | qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name; |
319 | success = false; |
320 | } |
321 | |
322 | // search for existing itemDatas, but unusable. |
323 | const auto ignoredNames = ignoredAttributeNames - usedAttributeNames; |
324 | for (const auto &styleName : ignoredNames) { |
325 | qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name |
326 | << "is never used. All uses are with lookAhead=true or <IncludeRules/>" ; |
327 | success = false; |
328 | } |
329 | |
330 | // search for unused itemDatas. |
331 | auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames; |
332 | unusedNames -= ignoredNames; |
333 | for (const auto &styleName : std::as_const(t&: unusedNames)) { |
334 | qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name; |
335 | success = false; |
336 | } |
337 | } |
338 | |
339 | QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules); |
340 | while (unreachableIncludedRuleIt.hasNext()) { |
341 | unreachableIncludedRuleIt.next(); |
342 | IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value(); |
343 | if (unreachableRulesBy.alwaysUnreachable) { |
344 | auto *rule = unreachableIncludedRuleIt.key(); |
345 | |
346 | if (!rule->parentContext->isOnlyIncluded) { |
347 | continue; |
348 | } |
349 | |
350 | // remove duplicates rules |
351 | QSet<const Context::Rule *> rules; |
352 | auto &unreachableBy = unreachableRulesBy.unreachableBy; |
353 | unreachableBy.erase(abegin: std::remove_if(first: unreachableBy.begin(), |
354 | last: unreachableBy.end(), |
355 | pred: [&](const RuleAndInclude &ruleAndInclude) { |
356 | if (rules.contains(value: ruleAndInclude.rule)) { |
357 | return true; |
358 | } |
359 | rules.insert(value: ruleAndInclude.rule); |
360 | return false; |
361 | }), |
362 | aend: unreachableBy.end()); |
363 | |
364 | QString message; |
365 | message.reserve(asize: 128); |
366 | for (auto &ruleAndInclude : std::as_const(t&: unreachableBy)) { |
367 | message += QStringLiteral("line " ); |
368 | message += QString::number(ruleAndInclude.rule->line); |
369 | message += QStringLiteral(" [" ); |
370 | message += ruleAndInclude.rule->parentContext->name; |
371 | if (rule->filename != ruleAndInclude.rule->filename) { |
372 | message += QStringLiteral(" (" ); |
373 | message += ruleAndInclude.rule->filename; |
374 | message += QLatin1Char(')'); |
375 | } |
376 | if (ruleAndInclude.includeRules) { |
377 | message += QStringLiteral(" via line " ); |
378 | message += QString::number(ruleAndInclude.includeRules->line); |
379 | } |
380 | message += QStringLiteral("], " ); |
381 | } |
382 | message.chop(n: 2); |
383 | |
384 | qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message; |
385 | success = false; |
386 | } |
387 | } |
388 | |
389 | return success; |
390 | } |
391 | |
392 | private: |
393 | enum class XmlBool { |
394 | Unspecified, |
395 | False, |
396 | True, |
397 | }; |
398 | |
399 | struct Context; |
400 | |
401 | struct ContextName { |
402 | QString name; |
403 | int popCount = 0; |
404 | bool stay = false; |
405 | |
406 | Context *context = nullptr; |
407 | }; |
408 | |
409 | struct Parser { |
410 | const QString &filename; |
411 | QXmlStreamReader &xml; |
412 | QXmlStreamAttribute &attr; |
413 | bool success; |
414 | |
415 | //! Read a string type attribute, \c success = \c false when \p str is not empty |
416 | //! \return \c true when attr.name() == attrName, otherwise false |
417 | bool (QString &str, const QString &attrName) |
418 | { |
419 | if (attr.name() != attrName) { |
420 | return false; |
421 | } |
422 | |
423 | str = attr.value().toString(); |
424 | if (str.isEmpty()) { |
425 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty" ; |
426 | success = false; |
427 | } |
428 | |
429 | return true; |
430 | } |
431 | |
432 | //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified. |
433 | //! \return \c true when attr.name() == attrName, otherwise false |
434 | bool (XmlBool &xmlBool, const QString &attrName) |
435 | { |
436 | if (attr.name() != attrName) { |
437 | return false; |
438 | } |
439 | |
440 | xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(str: attr.value()) ? XmlBool::True : XmlBool::False; |
441 | |
442 | return true; |
443 | } |
444 | |
445 | //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0 |
446 | //! \return \c true when attr.name() == attrName, otherwise false |
447 | bool (int &positive, const QString &attrName) |
448 | { |
449 | if (attr.name() != attrName) { |
450 | return false; |
451 | } |
452 | |
453 | bool ok = true; |
454 | positive = attr.value().toInt(ok: &ok); |
455 | |
456 | if (!ok || positive < 0) { |
457 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value(); |
458 | success = false; |
459 | } |
460 | |
461 | return true; |
462 | } |
463 | |
464 | //! Read a color, \c success = \c false when \p color is already greater than or equal to 0 |
465 | //! \return \c true when attr.name() == attrName, otherwise false |
466 | bool checkColor(const QString &attrName) |
467 | { |
468 | if (attr.name() != attrName) { |
469 | return false; |
470 | } |
471 | |
472 | const auto value = attr.value(); |
473 | if (value.isEmpty() /*|| QColor(value).isValid()*/) { |
474 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value; |
475 | success = false; |
476 | } |
477 | |
478 | return true; |
479 | } |
480 | |
481 | //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char |
482 | //! \return \c true when attr.name() == attrName, otherwise false |
483 | bool (QChar &c, const QString &attrName) |
484 | { |
485 | if (attr.name() != attrName) { |
486 | return false; |
487 | } |
488 | |
489 | if (attr.value().size() == 1) { |
490 | c = attr.value()[0]; |
491 | } else { |
492 | c = QLatin1Char('_'); |
493 | qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value(); |
494 | success = false; |
495 | } |
496 | |
497 | return true; |
498 | } |
499 | |
500 | //! \return parsing status when \p isExtracted is \c true, otherwise \c false |
501 | bool (bool ) |
502 | { |
503 | if (isExtracted) { |
504 | return success; |
505 | } |
506 | |
507 | qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name(); |
508 | return false; |
509 | } |
510 | }; |
511 | |
512 | struct Keywords { |
513 | struct Items { |
514 | struct Item { |
515 | QString content; |
516 | int line; |
517 | |
518 | friend size_t qHash(const Item &item, size_t seed = 0) |
519 | { |
520 | return qHash(key: item.content, seed); |
521 | } |
522 | |
523 | friend bool operator==(const Item &item0, const Item &item1) |
524 | { |
525 | return item0.content == item1.content; |
526 | } |
527 | }; |
528 | |
529 | QList<Item> keywords; |
530 | QSet<Item> includes; |
531 | |
532 | bool parseElement(const QString &filename, QXmlStreamReader &xml) |
533 | { |
534 | bool success = true; |
535 | |
536 | const int line = xml.lineNumber(); |
537 | QString content = xml.readElementText(); |
538 | |
539 | if (content.isEmpty()) { |
540 | qWarning() << filename << "line" << line << "is empty:" << xml.name(); |
541 | success = false; |
542 | } |
543 | |
544 | if (xml.name() == QStringLiteral("include" )) { |
545 | includes.insert(value: {.content: content, .line: line}); |
546 | } else if (xml.name() == QStringLiteral("item" )) { |
547 | keywords.append(t: {.content: content, .line: line}); |
548 | } else { |
549 | qWarning() << filename << "line" << line << "invalid element:" << xml.name(); |
550 | success = false; |
551 | } |
552 | |
553 | return success; |
554 | } |
555 | }; |
556 | |
557 | QString name; |
558 | Items items; |
559 | int line; |
560 | |
561 | bool parseElement(const QString &filename, QXmlStreamReader &xml) |
562 | { |
563 | line = xml.lineNumber(); |
564 | |
565 | bool success = true; |
566 | for (auto &attr : xml.attributes()) { |
567 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
568 | |
569 | const bool = parser.extractString(str&: name, QStringLiteral("name" )); |
570 | |
571 | success = parser.checkIfExtracted(isExtracted); |
572 | } |
573 | return success; |
574 | } |
575 | }; |
576 | |
577 | struct Context { |
578 | struct Rule { |
579 | enum class Type { |
580 | Unknown, |
581 | AnyChar, |
582 | Detect2Chars, |
583 | DetectChar, |
584 | DetectIdentifier, |
585 | DetectSpaces, |
586 | Float, |
587 | HlCChar, |
588 | HlCHex, |
589 | HlCOct, |
590 | HlCStringChar, |
591 | IncludeRules, |
592 | Int, |
593 | LineContinue, |
594 | RangeDetect, |
595 | RegExpr, |
596 | StringDetect, |
597 | WordDetect, |
598 | keyword, |
599 | }; |
600 | |
601 | Type type{}; |
602 | |
603 | bool isDotRegex = false; |
604 | int line = -1; |
605 | |
606 | // commonAttributes |
607 | QString attribute; |
608 | ContextName context; |
609 | QString beginRegion; |
610 | QString endRegion; |
611 | int column = -1; |
612 | XmlBool lookAhead{}; |
613 | XmlBool firstNonSpace{}; |
614 | |
615 | // StringDetect, WordDetect, keyword |
616 | XmlBool insensitive{}; |
617 | |
618 | // DetectChar, StringDetect, RegExpr, keyword |
619 | XmlBool dynamic{}; |
620 | |
621 | // Regex |
622 | XmlBool minimal{}; |
623 | |
624 | // IncludeRule |
625 | XmlBool includeAttrib{}; |
626 | |
627 | // DetectChar, Detect2Chars, LineContinue, RangeDetect |
628 | QChar char0; |
629 | // Detect2Chars, RangeDetect |
630 | QChar char1; |
631 | |
632 | // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword |
633 | QString string; |
634 | // RegExpr without .* as suffix |
635 | QString sanitizedString; |
636 | |
637 | // Float, HlCHex, HlCOct, Int, WordDetect, keyword |
638 | QString additionalDeliminator; |
639 | QString weakDeliminator; |
640 | |
641 | // rules included by IncludeRules (without IncludeRule) |
642 | QList<const Rule *> includedRules; |
643 | |
644 | // IncludeRules included by IncludeRules |
645 | QSet<const Rule *> includedIncludeRules; |
646 | |
647 | Context const *parentContext = nullptr; |
648 | |
649 | QString filename; |
650 | |
651 | bool parseElement(const QString &filename, QXmlStreamReader &xml) |
652 | { |
653 | this->filename = filename; |
654 | line = xml.lineNumber(); |
655 | |
656 | using Pair = QPair<QString, Type>; |
657 | static const auto pairs = { |
658 | Pair{QStringLiteral("AnyChar" ), Type::AnyChar}, |
659 | Pair{QStringLiteral("Detect2Chars" ), Type::Detect2Chars}, |
660 | Pair{QStringLiteral("DetectChar" ), Type::DetectChar}, |
661 | Pair{QStringLiteral("DetectIdentifier" ), Type::DetectIdentifier}, |
662 | Pair{QStringLiteral("DetectSpaces" ), Type::DetectSpaces}, |
663 | Pair{QStringLiteral("Float" ), Type::Float}, |
664 | Pair{QStringLiteral("HlCChar" ), Type::HlCChar}, |
665 | Pair{QStringLiteral("HlCHex" ), Type::HlCHex}, |
666 | Pair{QStringLiteral("HlCOct" ), Type::HlCOct}, |
667 | Pair{QStringLiteral("HlCStringChar" ), Type::HlCStringChar}, |
668 | Pair{QStringLiteral("IncludeRules" ), Type::IncludeRules}, |
669 | Pair{QStringLiteral("Int" ), Type::Int}, |
670 | Pair{QStringLiteral("LineContinue" ), Type::LineContinue}, |
671 | Pair{QStringLiteral("RangeDetect" ), Type::RangeDetect}, |
672 | Pair{QStringLiteral("RegExpr" ), Type::RegExpr}, |
673 | Pair{QStringLiteral("StringDetect" ), Type::StringDetect}, |
674 | Pair{QStringLiteral("WordDetect" ), Type::WordDetect}, |
675 | Pair{QStringLiteral("keyword" ), Type::keyword}, |
676 | }; |
677 | |
678 | for (auto pair : pairs) { |
679 | if (xml.name() == pair.first) { |
680 | type = pair.second; |
681 | bool success = parseAttributes(filename, xml); |
682 | success = checkMandoryAttributes(filename, xml) && success; |
683 | if (success && type == Type::RegExpr) { |
684 | // ., (.) followed by *, +, {1} or nothing |
685 | static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)" )); |
686 | // remove "(?:" and ")" |
687 | static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))" )); |
688 | // remove parentheses on a copy of string |
689 | auto reg = QString(string).replace(re: removeParentheses, after: QString()); |
690 | isDotRegex = reg.contains(re: isDot); |
691 | |
692 | // Remove .* and .*$ suffix. |
693 | static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$" )); |
694 | sanitizedString = string; |
695 | sanitizedString.replace(re: allSuffix, after: QString()); |
696 | // string is a catch-all, do not sanitize |
697 | if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^" )) { |
698 | sanitizedString = string; |
699 | } |
700 | } |
701 | return success; |
702 | } |
703 | } |
704 | |
705 | qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name(); |
706 | return false; |
707 | } |
708 | |
709 | private: |
710 | bool parseAttributes(const QString &filename, QXmlStreamReader &xml) |
711 | { |
712 | bool success = true; |
713 | |
714 | for (auto &attr : xml.attributes()) { |
715 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
716 | |
717 | // clang-format off |
718 | const bool |
719 | = parser.extractString(str&: attribute, QStringLiteral("attribute" )) |
720 | || parser.extractString(str&: context.name, QStringLiteral("context" )) |
721 | || parser.extractXmlBool(xmlBool&: lookAhead, QStringLiteral("lookAhead" )) |
722 | || parser.extractXmlBool(xmlBool&: firstNonSpace, QStringLiteral("firstNonSpace" )) |
723 | || parser.extractString(str&: beginRegion, QStringLiteral("beginRegion" )) |
724 | || parser.extractString(str&: endRegion, QStringLiteral("endRegion" )) |
725 | || parser.extractPositive(positive&: column, QStringLiteral("column" )) |
726 | || ((type == Type::RegExpr |
727 | || type == Type::StringDetect |
728 | || type == Type::WordDetect |
729 | || type == Type::keyword |
730 | ) && parser.extractXmlBool(xmlBool&: insensitive, QStringLiteral("insensitive" ))) |
731 | || ((type == Type::DetectChar |
732 | || type == Type::RegExpr |
733 | || type == Type::StringDetect |
734 | || type == Type::keyword |
735 | ) && parser.extractXmlBool(xmlBool&: dynamic, QStringLiteral("dynamic" ))) |
736 | || ((type == Type::RegExpr) |
737 | && parser.extractXmlBool(xmlBool&: minimal, QStringLiteral("minimal" ))) |
738 | || ((type == Type::DetectChar |
739 | || type == Type::Detect2Chars |
740 | || type == Type::LineContinue |
741 | || type == Type::RangeDetect |
742 | ) && parser.extractChar(c&: char0, QStringLiteral("char" ))) |
743 | || ((type == Type::Detect2Chars |
744 | || type == Type::RangeDetect |
745 | ) && parser.extractChar(c&: char1, QStringLiteral("char1" ))) |
746 | || ((type == Type::AnyChar |
747 | || type == Type::RegExpr |
748 | || type == Type::StringDetect |
749 | || type == Type::WordDetect |
750 | || type == Type::keyword |
751 | ) && parser.extractString(str&: string, QStringLiteral("String" ))) |
752 | || ((type == Type::IncludeRules) |
753 | && parser.extractXmlBool(xmlBool&: includeAttrib, QStringLiteral("includeAttrib" ))) |
754 | || ((type == Type::Float |
755 | || type == Type::HlCHex |
756 | || type == Type::HlCOct |
757 | || type == Type::Int |
758 | || type == Type::keyword |
759 | || type == Type::WordDetect |
760 | ) && (parser.extractString(str&: additionalDeliminator, QStringLiteral("additionalDeliminator" )) |
761 | || parser.extractString(str&: weakDeliminator, QStringLiteral("weakDeliminator" )))) |
762 | ; |
763 | // clang-format on |
764 | |
765 | success = parser.checkIfExtracted(isExtracted); |
766 | |
767 | if (type == Type::LineContinue && char0 == QLatin1Char('\0')) { |
768 | char0 = QLatin1Char('\\'); |
769 | } |
770 | } |
771 | |
772 | return success; |
773 | } |
774 | |
775 | bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml) |
776 | { |
777 | QString missingAttr; |
778 | |
779 | switch (type) { |
780 | case Type::Unknown: |
781 | return false; |
782 | |
783 | case Type::AnyChar: |
784 | case Type::RegExpr: |
785 | case Type::StringDetect: |
786 | case Type::WordDetect: |
787 | case Type::keyword: |
788 | missingAttr = string.isEmpty() ? QStringLiteral("String" ) : QString(); |
789 | break; |
790 | |
791 | case Type::DetectChar: |
792 | missingAttr = !char0.unicode() ? QStringLiteral("char" ) : QString(); |
793 | break; |
794 | |
795 | case Type::Detect2Chars: |
796 | case Type::RangeDetect: |
797 | missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1" ) |
798 | : !char0.unicode() ? QStringLiteral("char" ) |
799 | : !char1.unicode() ? QStringLiteral("char1" ) |
800 | : QString(); |
801 | break; |
802 | |
803 | case Type::IncludeRules: |
804 | missingAttr = context.name.isEmpty() ? QStringLiteral("context" ) : QString(); |
805 | break; |
806 | |
807 | case Type::DetectIdentifier: |
808 | case Type::DetectSpaces: |
809 | case Type::Float: |
810 | case Type::HlCChar: |
811 | case Type::HlCHex: |
812 | case Type::HlCOct: |
813 | case Type::HlCStringChar: |
814 | case Type::Int: |
815 | case Type::LineContinue: |
816 | break; |
817 | } |
818 | |
819 | if (!missingAttr.isEmpty()) { |
820 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr; |
821 | return false; |
822 | } |
823 | |
824 | return true; |
825 | } |
826 | }; |
827 | |
828 | int line; |
829 | // becomes false when a context (except includeRule) refers to it |
830 | bool isOnlyIncluded = true; |
831 | // becomes true when an includedRule refers to it with includeAttrib=true |
832 | bool referencedWithIncludeAttrib = false; |
833 | bool hasDynamicRule = false; |
834 | QString name; |
835 | QString attribute; |
836 | ContextName lineEndContext; |
837 | ContextName lineEmptyContext; |
838 | ContextName fallthroughContext; |
839 | QList<Rule> rules; |
840 | XmlBool dynamic{}; |
841 | XmlBool fallthrough{}; |
842 | XmlBool stopEmptyLineContextSwitchLoop{}; |
843 | |
844 | bool parseElement(const QString &filename, QXmlStreamReader &xml) |
845 | { |
846 | line = xml.lineNumber(); |
847 | |
848 | bool success = true; |
849 | |
850 | for (auto &attr : xml.attributes()) { |
851 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
852 | XmlBool noIndentationBasedFolding{}; |
853 | |
854 | // clang-format off |
855 | const bool = parser.extractString(str&: name, QStringLiteral("name" )) |
856 | || parser.extractString(str&: attribute, QStringLiteral("attribute" )) |
857 | || parser.extractString(str&: lineEndContext.name, QStringLiteral("lineEndContext" )) |
858 | || parser.extractString(str&: lineEmptyContext.name, QStringLiteral("lineEmptyContext" )) |
859 | || parser.extractString(str&: fallthroughContext.name, QStringLiteral("fallthroughContext" )) |
860 | || parser.extractXmlBool(xmlBool&: dynamic, QStringLiteral("dynamic" )) |
861 | || parser.extractXmlBool(xmlBool&: fallthrough, QStringLiteral("fallthrough" )) |
862 | || parser.extractXmlBool(xmlBool&: stopEmptyLineContextSwitchLoop, QStringLiteral("stopEmptyLineContextSwitchLoop" )) |
863 | || parser.extractXmlBool(xmlBool&: noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding" )); |
864 | // clang-format on |
865 | |
866 | success = parser.checkIfExtracted(isExtracted); |
867 | } |
868 | |
869 | if (name.isEmpty()) { |
870 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name" ; |
871 | success = false; |
872 | } |
873 | |
874 | if (attribute.isEmpty()) { |
875 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute" ; |
876 | success = false; |
877 | } |
878 | |
879 | return success; |
880 | } |
881 | }; |
882 | |
883 | struct Version { |
884 | int majorRevision; |
885 | int minorRevision; |
886 | |
887 | Version(int majorRevision = 0, int minorRevision = 0) |
888 | : majorRevision(majorRevision) |
889 | , minorRevision(minorRevision) |
890 | { |
891 | } |
892 | |
893 | bool operator<(const Version &version) const |
894 | { |
895 | return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision); |
896 | } |
897 | }; |
898 | |
899 | struct ItemDatas { |
900 | struct Style { |
901 | QString name; |
902 | int line; |
903 | |
904 | friend size_t qHash(const Style &style, size_t seed = 0) |
905 | { |
906 | return qHash(key: style.name, seed); |
907 | } |
908 | |
909 | friend bool operator==(const Style &style0, const Style &style1) |
910 | { |
911 | return style0.name == style1.name; |
912 | } |
913 | }; |
914 | |
915 | QSet<Style> styleNames; |
916 | |
917 | bool parseElement(const QString &filename, QXmlStreamReader &xml) |
918 | { |
919 | bool success = true; |
920 | |
921 | QString name; |
922 | QString defStyleNum; |
923 | XmlBool boolean; |
924 | |
925 | for (auto &attr : xml.attributes()) { |
926 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
927 | |
928 | const bool = parser.extractString(str&: name, QStringLiteral("name" )) || parser.extractString(str&: defStyleNum, QStringLiteral("defStyleNum" )) |
929 | || parser.extractXmlBool(xmlBool&: boolean, QStringLiteral("bold" )) || parser.extractXmlBool(xmlBool&: boolean, QStringLiteral("italic" )) |
930 | || parser.extractXmlBool(xmlBool&: boolean, QStringLiteral("underline" )) || parser.extractXmlBool(xmlBool&: boolean, QStringLiteral("strikeOut" )) |
931 | || parser.extractXmlBool(xmlBool&: boolean, QStringLiteral("spellChecking" )) || parser.checkColor(QStringLiteral("color" )) |
932 | || parser.checkColor(QStringLiteral("selColor" )) || parser.checkColor(QStringLiteral("backgroundColor" )) |
933 | || parser.checkColor(QStringLiteral("selBackgroundColor" )); |
934 | |
935 | success = parser.checkIfExtracted(isExtracted); |
936 | } |
937 | |
938 | if (!name.isEmpty()) { |
939 | const auto len = styleNames.size(); |
940 | styleNames.insert(value: {.name: name, .line: int(xml.lineNumber())}); |
941 | if (len == styleNames.size()) { |
942 | qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name; |
943 | success = false; |
944 | } |
945 | } |
946 | |
947 | return success; |
948 | } |
949 | }; |
950 | |
951 | struct Definition { |
952 | QMap<QString, Keywords> keywordsList; |
953 | QMap<QString, Context> contexts; |
954 | ItemDatas itemDatas; |
955 | QString firstContextName; |
956 | const Context *firstContext = nullptr; |
957 | QString filename; |
958 | WordDelimiters wordDelimiters; |
959 | Version kateVersion{}; |
960 | QString kateVersionStr; |
961 | QString languageName; |
962 | QSet<const Definition *> referencedDefinitions; |
963 | |
964 | // Parse <keywords ...> |
965 | bool parseKeywords(QXmlStreamReader &xml) |
966 | { |
967 | wordDelimiters.append(s: xml.attributes().value(QStringLiteral("additionalDeliminator" ))); |
968 | wordDelimiters.remove(c: xml.attributes().value(QStringLiteral("weakDeliminator" ))); |
969 | return true; |
970 | } |
971 | }; |
972 | |
973 | // Parse <context> |
974 | void processContextElement(QXmlStreamReader &xml) |
975 | { |
976 | Context context; |
977 | m_success = context.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
978 | if (m_currentDefinition->firstContextName.isEmpty()) { |
979 | m_currentDefinition->firstContextName = context.name; |
980 | } |
981 | if (m_currentDefinition->contexts.contains(key: context.name)) { |
982 | qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name; |
983 | m_success = false; |
984 | } |
985 | m_currentContext = &*m_currentDefinition->contexts.insert(key: context.name, value: context); |
986 | } |
987 | |
988 | // Parse <list name="..."> |
989 | void processListElement(QXmlStreamReader &xml) |
990 | { |
991 | Keywords keywords; |
992 | m_success = keywords.parseElement(filename: m_currentDefinition->filename, xml) && m_success; |
993 | if (m_currentDefinition->keywordsList.contains(key: keywords.name)) { |
994 | qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name; |
995 | m_success = false; |
996 | } |
997 | m_currentKeywords = &*m_currentDefinition->keywordsList.insert(key: keywords.name, value: keywords); |
998 | } |
999 | |
1000 | const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const |
1001 | { |
1002 | auto it = maxVersionByDefinitions.find(key: &definition); |
1003 | if (it != maxVersionByDefinitions.end()) { |
1004 | return it.value(); |
1005 | } else { |
1006 | auto it = maxVersionByDefinitions.insert(key: &definition, value: &definition); |
1007 | for (const auto &referencedDef : definition.referencedDefinitions) { |
1008 | auto *maxDef = maxKateVersionDefinition(definition: *referencedDef, maxVersionByDefinitions); |
1009 | if (it.value()->kateVersion < maxDef->kateVersion) { |
1010 | it.value() = maxDef; |
1011 | } |
1012 | } |
1013 | return it.value(); |
1014 | } |
1015 | } |
1016 | |
1017 | // Initialize the referenced rules (Rule::includedRules) |
1018 | void resolveIncludeRules() |
1019 | { |
1020 | QSet<const Context *> usedContexts; |
1021 | QList<const Context *> contexts; |
1022 | |
1023 | QMutableMapIterator<QString, Definition> def(m_definitions); |
1024 | while (def.hasNext()) { |
1025 | def.next(); |
1026 | auto &definition = def.value(); |
1027 | QMutableMapIterator<QString, Context> contextIt(definition.contexts); |
1028 | while (contextIt.hasNext()) { |
1029 | contextIt.next(); |
1030 | auto ¤tContext = contextIt.value(); |
1031 | for (auto &rule : currentContext.rules) { |
1032 | if (rule.type != Context::Rule::Type::IncludeRules) { |
1033 | continue; |
1034 | } |
1035 | |
1036 | if (rule.context.stay) { |
1037 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself" ; |
1038 | m_success = false; |
1039 | continue; |
1040 | } |
1041 | |
1042 | if (rule.context.popCount) { |
1043 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix" ; |
1044 | m_success = false; |
1045 | } |
1046 | |
1047 | if (!rule.context.context) { |
1048 | m_success = false; |
1049 | continue; |
1050 | } |
1051 | |
1052 | // resolve includedRules and includedIncludeRules |
1053 | |
1054 | usedContexts.clear(); |
1055 | usedContexts.insert(value: rule.context.context); |
1056 | contexts.clear(); |
1057 | contexts.append(t: rule.context.context); |
1058 | |
1059 | for (int i = 0; i < contexts.size(); ++i) { |
1060 | currentContext.hasDynamicRule = contexts[i]->hasDynamicRule; |
1061 | for (const auto &includedRule : contexts[i]->rules) { |
1062 | if (includedRule.type != Context::Rule::Type::IncludeRules) { |
1063 | rule.includedRules.append(t: &includedRule); |
1064 | } else if (&rule == &includedRule) { |
1065 | qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity" ; |
1066 | m_success = false; |
1067 | } else { |
1068 | rule.includedIncludeRules.insert(value: &includedRule); |
1069 | |
1070 | if (includedRule.includedRules.isEmpty()) { |
1071 | const auto *context = includedRule.context.context; |
1072 | if (context && !usedContexts.contains(value: context)) { |
1073 | contexts.append(t: context); |
1074 | usedContexts.insert(value: context); |
1075 | } |
1076 | } else { |
1077 | rule.includedRules.append(l: includedRule.includedRules); |
1078 | } |
1079 | } |
1080 | } |
1081 | } |
1082 | } |
1083 | } |
1084 | } |
1085 | } |
1086 | |
1087 | //! Recursively extracts the contexts used from the first context of the definitions. |
1088 | //! This method detects groups of contexts which are only used among themselves. |
1089 | QSet<const Context *> () const |
1090 | { |
1091 | QSet<const Context *> usedContexts; |
1092 | QList<const Context *> contexts; |
1093 | |
1094 | QMapIterator<QString, Definition> def(m_definitions); |
1095 | while (def.hasNext()) { |
1096 | def.next(); |
1097 | const auto &definition = def.value(); |
1098 | |
1099 | if (definition.firstContext) { |
1100 | usedContexts.insert(value: definition.firstContext); |
1101 | contexts.clear(); |
1102 | contexts.append(t: definition.firstContext); |
1103 | |
1104 | for (int i = 0; i < contexts.size(); ++i) { |
1105 | auto appendContext = [&](const Context *context) { |
1106 | if (context && !usedContexts.contains(value: context)) { |
1107 | contexts.append(t: context); |
1108 | usedContexts.insert(value: context); |
1109 | } |
1110 | }; |
1111 | |
1112 | const auto *context = contexts[i]; |
1113 | appendContext(context->lineEndContext.context); |
1114 | appendContext(context->lineEmptyContext.context); |
1115 | appendContext(context->fallthroughContext.context); |
1116 | |
1117 | for (auto &rule : context->rules) { |
1118 | appendContext(rule.context.context); |
1119 | } |
1120 | } |
1121 | } |
1122 | } |
1123 | |
1124 | return usedContexts; |
1125 | } |
1126 | |
1127 | struct RuleAndInclude { |
1128 | const Context::Rule *rule; |
1129 | const Context::Rule *includeRules; |
1130 | |
1131 | explicit operator bool() const |
1132 | { |
1133 | return rule; |
1134 | } |
1135 | }; |
1136 | |
1137 | struct IncludedRuleUnreachableBy { |
1138 | QList<RuleAndInclude> unreachableBy; |
1139 | bool alwaysUnreachable = true; |
1140 | }; |
1141 | |
1142 | //! Check contexts and rules |
1143 | bool checkContexts(const Definition &definition, |
1144 | QSet<ItemDatas::Style> &usedAttributeNames, |
1145 | QSet<ItemDatas::Style> &ignoredAttributeNames, |
1146 | const QSet<const Context *> &usedContexts, |
1147 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const |
1148 | { |
1149 | bool success = true; |
1150 | |
1151 | QMapIterator<QString, Context> contextIt(definition.contexts); |
1152 | while (contextIt.hasNext()) { |
1153 | contextIt.next(); |
1154 | |
1155 | const auto &context = contextIt.value(); |
1156 | const auto &filename = definition.filename; |
1157 | |
1158 | if (!usedContexts.contains(value: &context)) { |
1159 | qWarning() << filename << "line" << context.line << "unused context:" << context.name; |
1160 | success = false; |
1161 | continue; |
1162 | } |
1163 | |
1164 | if (context.name.startsWith(QStringLiteral("#pop" ))) { |
1165 | qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name; |
1166 | success = false; |
1167 | } |
1168 | |
1169 | if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) { |
1170 | usedAttributeNames.insert(value: {.name: context.attribute, .line: context.line}); |
1171 | } |
1172 | |
1173 | success = checkContextAttribute(definition, context) && success; |
1174 | success = checkUreachableRules(filename: definition.filename, context, unreachableIncludedRules) && success; |
1175 | success = suggestRuleMerger(filename: definition.filename, context) && success; |
1176 | |
1177 | for (const auto &rule : context.rules) { |
1178 | if (!rule.attribute.isEmpty()) { |
1179 | if (rule.lookAhead != XmlBool::True) { |
1180 | usedAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line}); |
1181 | } else { |
1182 | ignoredAttributeNames.insert(value: {.name: rule.attribute, .line: rule.line}); |
1183 | } |
1184 | } |
1185 | success = checkLookAhead(rule) && success; |
1186 | success = checkStringDetect(rule) && success; |
1187 | success = checkKeyword(definition, rule) && success; |
1188 | success = checkRegExpr(filename, rule, context) && success; |
1189 | success = checkDelimiters(definition, rule) && success; |
1190 | } |
1191 | } |
1192 | |
1193 | return success; |
1194 | } |
1195 | |
1196 | //! Check that a regular expression in a RegExpr rule: |
1197 | //! - isValid() |
1198 | //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. |
1199 | //! - dynamic=true but no place holder used? |
1200 | //! - is not . with lookAhead="1" |
1201 | //! - is not ^... without column ou firstNonSpace attribute |
1202 | //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar |
1203 | //! - has no unused captures |
1204 | //! - has no unnecessary quantifier with lookAhead |
1205 | bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const |
1206 | { |
1207 | // ignore empty regex because the error is raised during xml parsing |
1208 | if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) { |
1209 | const QRegularExpression regexp(rule.string); |
1210 | if (!checkRegularExpression(filename: rule.filename, regexp, line: rule.line)) { |
1211 | return false; |
1212 | } |
1213 | |
1214 | // dynamic == true and no place holder? |
1215 | if (rule.dynamic == XmlBool::True) { |
1216 | static const QRegularExpression placeHolder(QStringLiteral("%\\d+" )); |
1217 | if (!rule.string.contains(re: placeHolder)) { |
1218 | qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder" ; |
1219 | return false; |
1220 | } |
1221 | } |
1222 | |
1223 | auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string; |
1224 | if (rule.lookAhead == XmlBool::True) { |
1225 | static const QRegularExpression removeAllSuffix(QStringLiteral( |
1226 | R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)" )); |
1227 | reg.replace(re: removeAllSuffix, after: QString()); |
1228 | } |
1229 | |
1230 | reg.replace(QStringLiteral("{1}" ), after: QString()); |
1231 | |
1232 | // is DetectSpaces |
1233 | // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, + |
1234 | static const QRegularExpression isDetectSpaces( |
1235 | QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)" )); |
1236 | if (rule.string.contains(re: isDetectSpaces)) { |
1237 | char const * = rule.string.contains(c: QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "" ; |
1238 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":" |
1239 | << rule.string; |
1240 | return false; |
1241 | } |
1242 | |
1243 | #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))" |
1244 | #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])" |
1245 | |
1246 | // is RangeDetect |
1247 | static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:" |
1248 | "\\.\\*[?+]?" REG_CHAR "|" |
1249 | "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1" |
1250 | ")$" )); |
1251 | if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?" )) |
1252 | || rule.string.contains(QStringLiteral("[^" ))) |
1253 | && reg.contains(re: isRange)) { |
1254 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string; |
1255 | return false; |
1256 | } |
1257 | |
1258 | // is AnyChar |
1259 | static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)" )); |
1260 | if (rule.string.contains(re: isAnyChar)) { |
1261 | auto = (reg[0] == QLatin1Char('^') || reg[1] == QLatin1Char('^')) ? "with column=\"0\"" : "" ; |
1262 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra; |
1263 | return false; |
1264 | } |
1265 | |
1266 | // is LineContinue |
1267 | static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$" )); |
1268 | if (reg.contains(re: isLineContinue)) { |
1269 | auto = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "" ; |
1270 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra; |
1271 | return false; |
1272 | } |
1273 | |
1274 | // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _ |
1275 | static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR)); |
1276 | reg.replace(re: sanitize1, QStringLiteral("_" )); |
1277 | |
1278 | #undef REG_CHAR |
1279 | #undef REG_ESCAPE_CHAR |
1280 | |
1281 | // use minimal or lazy operator |
1282 | static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]" )); |
1283 | static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]" )); |
1284 | |
1285 | if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: isMinimal) && !reg.contains(re: hasNotGreedy) |
1286 | && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0) |
1287 | && (reg.back() != QLatin1Char('$') || reg.contains(c: QLatin1Char('|')))) { |
1288 | qWarning() << rule.filename << "line" << rule.line |
1289 | << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string; |
1290 | return false; |
1291 | } |
1292 | |
1293 | // replace [:...:] with ___ |
1294 | static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])" )); |
1295 | reg.replace(re: sanitize2, QStringLiteral("___" )); |
1296 | |
1297 | // replace [ccc...], [special] with ... |
1298 | static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))" )); |
1299 | reg.replace(re: sanitize3, QStringLiteral("...\\1" )); |
1300 | |
1301 | // replace [c] with _ |
1302 | static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])" )); |
1303 | reg.replace(re: sanitize4, QStringLiteral("_" )); |
1304 | |
1305 | const int len = reg.size(); |
1306 | // replace [cC] with _ |
1307 | static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])" )); |
1308 | reg = reg.toUpper(); |
1309 | reg.replace(re: toInsensitive, after: QString()); |
1310 | |
1311 | // is StringDetect |
1312 | // ignore (?:, ) and {n} |
1313 | static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)" )); |
1314 | if (reg.contains(re: isStringDetect)) { |
1315 | char const * = rule.string.contains(c: QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "" ; |
1316 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg |
1317 | << ":" << rule.string; |
1318 | if (len != reg.size()) { |
1319 | qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string; |
1320 | } |
1321 | return false; |
1322 | } |
1323 | |
1324 | // column="0" |
1325 | if (rule.column == -1) { |
1326 | // ^ without | |
1327 | // (^sas*) -> ok |
1328 | // (^sa|s*) -> ko |
1329 | // (^(sa|s*)) -> ok |
1330 | auto first = std::as_const(t&: reg).begin(); |
1331 | auto last = std::as_const(t&: reg).end(); |
1332 | int depth = 0; |
1333 | |
1334 | while (QLatin1Char('(') == *first) { |
1335 | ++depth; |
1336 | ++first; |
1337 | if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) { |
1338 | first += 2; |
1339 | } |
1340 | } |
1341 | |
1342 | if (QLatin1Char('^') == *first) { |
1343 | const int bolDepth = depth; |
1344 | bool replace = true; |
1345 | |
1346 | while (++first != last) { |
1347 | if (QLatin1Char('(') == *first) { |
1348 | ++depth; |
1349 | } else if (QLatin1Char(')') == *first) { |
1350 | --depth; |
1351 | if (depth < bolDepth) { |
1352 | // (^a)? === (^a|) -> ko |
1353 | if (first + 1 != last && QStringLiteral("*?" ).contains(c: first[1])) { |
1354 | replace = false; |
1355 | break; |
1356 | } |
1357 | } |
1358 | } else if (QLatin1Char('|') == *first) { |
1359 | // ignore '|' within subgroup |
1360 | if (depth <= bolDepth) { |
1361 | replace = false; |
1362 | break; |
1363 | } |
1364 | } |
1365 | } |
1366 | |
1367 | if (replace) { |
1368 | qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string; |
1369 | return false; |
1370 | } |
1371 | } |
1372 | } |
1373 | |
1374 | // add ^ with column=0 |
1375 | if (rule.column == 0 && !rule.isDotRegex) { |
1376 | bool hasStartOfLine = false; |
1377 | auto first = std::as_const(t&: reg).begin(); |
1378 | auto last = std::as_const(t&: reg).end(); |
1379 | for (; first != last; ++first) { |
1380 | if (*first == QLatin1Char('^')) { |
1381 | hasStartOfLine = true; |
1382 | break; |
1383 | } else if (*first == QLatin1Char('(')) { |
1384 | if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) { |
1385 | first += 2; |
1386 | } |
1387 | } else { |
1388 | break; |
1389 | } |
1390 | } |
1391 | |
1392 | if (!hasStartOfLine) { |
1393 | qWarning() << rule.filename << "line" << rule.line |
1394 | << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string; |
1395 | return false; |
1396 | } |
1397 | } |
1398 | |
1399 | bool useCapture = false; |
1400 | |
1401 | // detection of unnecessary capture |
1402 | if (regexp.captureCount()) { |
1403 | auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) { |
1404 | int maxCapture = 9; |
1405 | while (maxCapture && !s.contains(s: referenceNames[maxCapture - 1])) { |
1406 | --maxCapture; |
1407 | } |
1408 | return maxCapture; |
1409 | }; |
1410 | |
1411 | int maxCaptureUsed = 0; |
1412 | // maximal dynamic reference |
1413 | if (rule.context.context && !rule.context.stay) { |
1414 | for (const auto &nextRule : rule.context.context->rules) { |
1415 | if (nextRule.dynamic == XmlBool::True) { |
1416 | static const QString cap[]{ |
1417 | QStringLiteral("%1" ), |
1418 | QStringLiteral("%2" ), |
1419 | QStringLiteral("%3" ), |
1420 | QStringLiteral("%4" ), |
1421 | QStringLiteral("%5" ), |
1422 | QStringLiteral("%6" ), |
1423 | QStringLiteral("%7" ), |
1424 | QStringLiteral("%8" ), |
1425 | QStringLiteral("%9" ), |
1426 | }; |
1427 | int maxDynamicCapture = maximalCapture(cap, nextRule.string); |
1428 | maxCaptureUsed = std::max(a: maxCaptureUsed, b: maxDynamicCapture); |
1429 | } |
1430 | } |
1431 | } |
1432 | |
1433 | static const QString num1[]{ |
1434 | QStringLiteral("\\1" ), |
1435 | QStringLiteral("\\2" ), |
1436 | QStringLiteral("\\3" ), |
1437 | QStringLiteral("\\4" ), |
1438 | QStringLiteral("\\5" ), |
1439 | QStringLiteral("\\6" ), |
1440 | QStringLiteral("\\7" ), |
1441 | QStringLiteral("\\8" ), |
1442 | QStringLiteral("\\9" ), |
1443 | }; |
1444 | static const QString num2[]{ |
1445 | QStringLiteral("\\g1" ), |
1446 | QStringLiteral("\\g2" ), |
1447 | QStringLiteral("\\g3" ), |
1448 | QStringLiteral("\\g4" ), |
1449 | QStringLiteral("\\g5" ), |
1450 | QStringLiteral("\\g6" ), |
1451 | QStringLiteral("\\g7" ), |
1452 | QStringLiteral("\\g8" ), |
1453 | QStringLiteral("\\g9" ), |
1454 | }; |
1455 | const int maxBackReference = std::max(a: maximalCapture(num1, rule.string), b: maximalCapture(num1, rule.string)); |
1456 | |
1457 | const int maxCapture = std::max(a: maxCaptureUsed, b: maxBackReference); |
1458 | |
1459 | if (maxCapture && regexp.captureCount() > maxCapture) { |
1460 | qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture |
1461 | << "are used. Please, replace '(...)' with '(?:...)':" << rule.string; |
1462 | return false; |
1463 | } |
1464 | |
1465 | useCapture = maxCapture; |
1466 | } |
1467 | |
1468 | if (!useCapture) { |
1469 | // is DetectIdentifier |
1470 | static const QRegularExpression isDetectIdentifier( |
1471 | QStringLiteral(R"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)" )); |
1472 | if (rule.string.contains(re: isDetectIdentifier)) { |
1473 | qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string; |
1474 | return false; |
1475 | } |
1476 | } |
1477 | |
1478 | if (rule.isDotRegex) { |
1479 | // search next rule with same column or firstNonSpace |
1480 | int i = &rule - context.rules.data() + 1; |
1481 | const bool hasColumn = (rule.column != -1); |
1482 | const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True); |
1483 | const bool isSpecial = (hasColumn || hasFirstNonSpace); |
1484 | for (; i < context.rules.size(); ++i) { |
1485 | auto &rule2 = context.rules[i]; |
1486 | if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) { |
1487 | i = context.rules.size(); |
1488 | break; |
1489 | } |
1490 | |
1491 | const bool hasColumn2 = (rule2.column != -1); |
1492 | const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True); |
1493 | if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column) |
1494 | || (hasFirstNonSpace && hasFirstNonSpace2)) { |
1495 | break; |
1496 | } |
1497 | } |
1498 | |
1499 | auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in " ) + rule.filename; |
1500 | if (i == context.rules.size()) { |
1501 | if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty() |
1502 | && rule.endRegion.isEmpty() && !useCapture) { |
1503 | qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename |
1504 | << "should be replaced by fallthroughContext:" << rule.string; |
1505 | } |
1506 | } else { |
1507 | auto &nextRule = context.rules[i]; |
1508 | auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in " ) + nextRule.filename; |
1509 | qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename |
1510 | << "because a dot RegExpr is used line" << rule.line << ruleFilename; |
1511 | } |
1512 | |
1513 | // unnecessary quantifier |
1514 | static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)" )); |
1515 | static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)" )); |
1516 | auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2; |
1517 | if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(re: unnecessaryQuantifier)) { |
1518 | qWarning() << rule.filename << "line" << rule.line |
1519 | << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string; |
1520 | return false; |
1521 | } |
1522 | } |
1523 | } |
1524 | |
1525 | return true; |
1526 | } |
1527 | |
1528 | // Parse and check <emptyLine> |
1529 | bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml) |
1530 | { |
1531 | bool success = true; |
1532 | |
1533 | QString pattern; |
1534 | XmlBool casesensitive{}; |
1535 | |
1536 | for (auto &attr : xml.attributes()) { |
1537 | Parser parser{.filename: filename, .xml: xml, .attr: attr, .success: success}; |
1538 | |
1539 | const bool = |
1540 | parser.extractString(str&: pattern, QStringLiteral("regexpr" )) || parser.extractXmlBool(xmlBool&: casesensitive, QStringLiteral("casesensitive" )); |
1541 | |
1542 | success = parser.checkIfExtracted(isExtracted); |
1543 | } |
1544 | |
1545 | if (pattern.isEmpty()) { |
1546 | qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr" ; |
1547 | success = false; |
1548 | } else { |
1549 | success = checkRegularExpression(filename, regexp: QRegularExpression(pattern), line: xml.lineNumber()); |
1550 | } |
1551 | |
1552 | return success; |
1553 | } |
1554 | |
1555 | //! Check that a regular expression: |
1556 | //! - isValid() |
1557 | //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. |
1558 | bool checkRegularExpression(const QString &filename, const QRegularExpression ®exp, int line) const |
1559 | { |
1560 | const auto pattern = regexp.pattern(); |
1561 | |
1562 | // validate regexp |
1563 | if (!regexp.isValid()) { |
1564 | qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset" |
1565 | << regexp.patternErrorOffset(); |
1566 | return false; |
1567 | } |
1568 | |
1569 | // catch possible case typos: [A-z] or [a-Z] |
1570 | const int azOffset = std::max(a: pattern.indexOf(QStringLiteral("A-z" )), b: pattern.indexOf(QStringLiteral("a-Z" ))); |
1571 | if (azOffset >= 0) { |
1572 | qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset; |
1573 | return false; |
1574 | } |
1575 | |
1576 | return true; |
1577 | } |
1578 | |
1579 | //! Check fallthrough and fallthroughContext. |
1580 | //! Check kateversion for stopEmptyLineContextSwitchLoop. |
1581 | bool checkContextAttribute(const Definition &definition, const Context &context) const |
1582 | { |
1583 | bool success = true; |
1584 | |
1585 | if (!context.fallthroughContext.name.isEmpty()) { |
1586 | const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62}; |
1587 | if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) { |
1588 | qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context" |
1589 | << context.name; |
1590 | success = false; |
1591 | } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) { |
1592 | qWarning() << definition.filename << "line" << context.line |
1593 | << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context" |
1594 | << context.name; |
1595 | success = false; |
1596 | } |
1597 | } |
1598 | |
1599 | if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < Version{5, 103}) { |
1600 | qWarning() << definition.filename << "line" << context.line |
1601 | << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name; |
1602 | success = false; |
1603 | } |
1604 | |
1605 | return success; |
1606 | } |
1607 | |
1608 | //! Search for additionalDeliminator/weakDeliminator which has no effect. |
1609 | bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const |
1610 | { |
1611 | if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) { |
1612 | return true; |
1613 | } |
1614 | |
1615 | bool success = true; |
1616 | |
1617 | if (definition.kateVersion < Version{5, 79}) { |
1618 | qWarning() << definition.filename << "line" << rule.line |
1619 | << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion." ; |
1620 | success = false; |
1621 | } |
1622 | |
1623 | for (QChar c : rule.additionalDeliminator) { |
1624 | if (!definition.wordDelimiters.contains(c)) { |
1625 | return success; |
1626 | } |
1627 | } |
1628 | |
1629 | for (QChar c : rule.weakDeliminator) { |
1630 | if (definition.wordDelimiters.contains(c)) { |
1631 | return success; |
1632 | } |
1633 | } |
1634 | |
1635 | qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string; |
1636 | return false; |
1637 | } |
1638 | |
1639 | //! Check that keyword rule reference an existing keyword list. |
1640 | bool checkKeyword(const Definition &definition, const Context::Rule &rule) const |
1641 | { |
1642 | if (rule.type == Context::Rule::Type::keyword) { |
1643 | auto it = definition.keywordsList.find(key: rule.string); |
1644 | if (it == definition.keywordsList.end()) { |
1645 | qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string; |
1646 | return false; |
1647 | } |
1648 | } |
1649 | return true; |
1650 | } |
1651 | |
1652 | //! Search for rules with lookAhead="true" and context="#stay". |
1653 | //! This would cause an infinite loop. |
1654 | bool (const Context::Rule &rule) const |
1655 | { |
1656 | if (rule.lookAhead == XmlBool::True && rule.context.stay) { |
1657 | qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay" ; |
1658 | } |
1659 | return true; |
1660 | } |
1661 | |
1662 | //! Check that StringDetect contains a placeHolder when dynamic="1" |
1663 | bool checkStringDetect(const Context::Rule &rule) const |
1664 | { |
1665 | if (rule.type == Context::Rule::Type::StringDetect) { |
1666 | // dynamic == true and no place holder? |
1667 | if (rule.dynamic == XmlBool::True) { |
1668 | static const QRegularExpression placeHolder(QStringLiteral("%\\d+" )); |
1669 | if (!rule.string.contains(re: placeHolder)) { |
1670 | qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder" ; |
1671 | return false; |
1672 | } |
1673 | } |
1674 | } |
1675 | return true; |
1676 | } |
1677 | |
1678 | //! Check \<include> and delimiter in a keyword list |
1679 | bool checkKeywordsList(const Definition &definition) const |
1680 | { |
1681 | bool success = true; |
1682 | |
1683 | bool includeNotSupport = (definition.kateVersion < Version{5, 53}); |
1684 | QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList); |
1685 | while (keywordsIt.hasNext()) { |
1686 | keywordsIt.next(); |
1687 | |
1688 | for (const auto &include : keywordsIt.value().items.includes) { |
1689 | if (includeNotSupport) { |
1690 | qWarning() << definition.filename << "line" << include.line |
1691 | << "<include> is only available since version \"5.53\". Please, increase kateversion." ; |
1692 | success = false; |
1693 | } |
1694 | success = checkKeywordInclude(definition, include) && success; |
1695 | } |
1696 | |
1697 | // Check that keyword list items do not have deliminator character |
1698 | #if 0 |
1699 | for (const auto& keyword : keywordsIt.value().items.keywords) { |
1700 | for (QChar c : keyword.content) { |
1701 | if (definition.wordDelimiters.contains(c)) { |
1702 | qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content; |
1703 | success = false; |
1704 | } |
1705 | } |
1706 | } |
1707 | #endif |
1708 | } |
1709 | |
1710 | return success; |
1711 | } |
1712 | |
1713 | //! Search for non-existing keyword include. |
1714 | bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const |
1715 | { |
1716 | bool containsKeywordName = true; |
1717 | int const idx = include.content.indexOf(QStringLiteral("##" )); |
1718 | if (idx == -1) { |
1719 | auto it = definition.keywordsList.find(key: include.content); |
1720 | containsKeywordName = (it != definition.keywordsList.end()); |
1721 | } else { |
1722 | auto defName = include.content.mid(position: idx + 2); |
1723 | auto listName = include.content.left(n: idx); |
1724 | auto it = m_definitions.find(key: defName); |
1725 | if (it == m_definitions.end()) { |
1726 | qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content; |
1727 | return false; |
1728 | } |
1729 | containsKeywordName = it->keywordsList.contains(key: listName); |
1730 | } |
1731 | |
1732 | if (!containsKeywordName) { |
1733 | qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content; |
1734 | } |
1735 | |
1736 | return containsKeywordName; |
1737 | } |
1738 | |
1739 | //! Check if a rule is hidden by another |
1740 | //! - rule hidden by DetectChar or AnyChar |
1741 | //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar |
1742 | //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings |
1743 | //! - duplicate rule (Int, Float, keyword with same String, etc) |
1744 | //! - Rule hidden by a dot regex |
1745 | bool checkUreachableRules(const QString &filename, |
1746 | const Context &context, |
1747 | QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const |
1748 | { |
1749 | if (context.isOnlyIncluded) { |
1750 | return true; |
1751 | } |
1752 | |
1753 | struct Rule4 { |
1754 | RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr) |
1755 | { |
1756 | auto set = [&](RuleAndInclude &ruleAndInclude) { |
1757 | auto old = ruleAndInclude; |
1758 | ruleAndInclude = {.rule: &rule, .includeRules: includeRules}; |
1759 | return old; |
1760 | }; |
1761 | |
1762 | if (rule.firstNonSpace == XmlBool::True) { |
1763 | return set(firstNonSpace); |
1764 | } else if (rule.column == 0) { |
1765 | return set(column0); |
1766 | } else if (rule.column > 0) { |
1767 | return set(columnGreaterThan0[rule.column]); |
1768 | } else { |
1769 | return set(normal); |
1770 | } |
1771 | } |
1772 | |
1773 | private: |
1774 | RuleAndInclude normal; |
1775 | RuleAndInclude column0; |
1776 | QMap<int, RuleAndInclude> columnGreaterThan0; |
1777 | RuleAndInclude firstNonSpace; |
1778 | }; |
1779 | |
1780 | // Associate QChar with RuleAndInclude |
1781 | struct CharTable { |
1782 | /// Search RuleAndInclude associated with @p c. |
1783 | RuleAndInclude find(QChar c) const |
1784 | { |
1785 | if (c.unicode() < 128) { |
1786 | return m_asciiMap[c.unicode()]; |
1787 | } |
1788 | auto it = m_utf8Map.find(key: c); |
1789 | return it == m_utf8Map.end() ? RuleAndInclude{.rule: nullptr, .includeRules: nullptr} : it.value(); |
1790 | } |
1791 | |
1792 | /// Search RuleAndInclude associated with the characters of @p s. |
1793 | /// \return an empty QList when at least one character is not found. |
1794 | QList<RuleAndInclude> find(QStringView s) const |
1795 | { |
1796 | QList<RuleAndInclude> result; |
1797 | |
1798 | for (QChar c : s) { |
1799 | if (!find(c)) { |
1800 | return result; |
1801 | } |
1802 | } |
1803 | |
1804 | for (QChar c : s) { |
1805 | result.append(t: find(c)); |
1806 | } |
1807 | |
1808 | return result; |
1809 | } |
1810 | |
1811 | /// Associates @p c with a rule. |
1812 | void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1813 | { |
1814 | if (c.unicode() < 128) { |
1815 | m_asciiMap[c.unicode()] = {.rule: &rule, .includeRules: includeRule}; |
1816 | } else { |
1817 | m_utf8Map[c] = {.rule: &rule, .includeRules: includeRule}; |
1818 | } |
1819 | } |
1820 | |
1821 | /// Associates each character of @p s with a rule. |
1822 | void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1823 | { |
1824 | for (QChar c : s) { |
1825 | append(c, rule, includeRule); |
1826 | } |
1827 | } |
1828 | |
1829 | private: |
1830 | RuleAndInclude m_asciiMap[127]{}; |
1831 | QMap<QChar, RuleAndInclude> m_utf8Map; |
1832 | }; |
1833 | |
1834 | struct Char4Tables { |
1835 | CharTable chars; |
1836 | CharTable charsColumn0; |
1837 | QMap<int, CharTable> charsColumnGreaterThan0; |
1838 | CharTable charsFirstNonSpace; |
1839 | }; |
1840 | |
1841 | // View on Char4Tables members |
1842 | struct CharTableArray { |
1843 | // Append Char4Tables members that satisfies firstNonSpace and column. |
1844 | // Char4Tables::char is always added. |
1845 | CharTableArray(Char4Tables &tables, const Context::Rule &rule) |
1846 | { |
1847 | if (rule.firstNonSpace == XmlBool::True) { |
1848 | appendTable(t&: tables.charsFirstNonSpace); |
1849 | } |
1850 | |
1851 | if (rule.column == 0) { |
1852 | appendTable(t&: tables.charsColumn0); |
1853 | } else if (rule.column > 0) { |
1854 | appendTable(t&: tables.charsColumnGreaterThan0[rule.column]); |
1855 | } |
1856 | |
1857 | appendTable(t&: tables.chars); |
1858 | } |
1859 | |
1860 | // Removes Char4Tables::chars when the rule contains firstNonSpace or column |
1861 | void removeNonSpecialWhenSpecial() |
1862 | { |
1863 | if (m_size > 1) { |
1864 | --m_size; |
1865 | } |
1866 | } |
1867 | |
1868 | /// Search RuleAndInclude associated with @p c. |
1869 | RuleAndInclude find(QChar c) const |
1870 | { |
1871 | for (int i = 0; i < m_size; ++i) { |
1872 | if (auto ruleAndInclude = m_charTables[i]->find(c)) { |
1873 | return ruleAndInclude; |
1874 | } |
1875 | } |
1876 | return RuleAndInclude{.rule: nullptr, .includeRules: nullptr}; |
1877 | } |
1878 | |
1879 | /// Search RuleAndInclude associated with the characters of @p s. |
1880 | /// \return an empty QList when at least one character is not found. |
1881 | QList<RuleAndInclude> find(QStringView s) const |
1882 | { |
1883 | for (int i = 0; i < m_size; ++i) { |
1884 | auto result = m_charTables[i]->find(s); |
1885 | if (result.size()) { |
1886 | while (++i < m_size) { |
1887 | result.append(other: m_charTables[i]->find(s)); |
1888 | } |
1889 | return result; |
1890 | } |
1891 | } |
1892 | return QList<RuleAndInclude>(); |
1893 | } |
1894 | |
1895 | /// Associates @p c with a rule. |
1896 | void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1897 | { |
1898 | for (int i = 0; i < m_size; ++i) { |
1899 | m_charTables[i]->append(c, rule, includeRule); |
1900 | } |
1901 | } |
1902 | |
1903 | /// Associates each character of @p s with a rule. |
1904 | void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) |
1905 | { |
1906 | for (int i = 0; i < m_size; ++i) { |
1907 | m_charTables[i]->append(s, rule, includeRule); |
1908 | } |
1909 | } |
1910 | |
1911 | private: |
1912 | void appendTable(CharTable &t) |
1913 | { |
1914 | m_charTables[m_size] = &t; |
1915 | ++m_size; |
1916 | } |
1917 | |
1918 | CharTable *m_charTables[3]; |
1919 | int m_size = 0; |
1920 | }; |
1921 | |
1922 | struct ObservableRule { |
1923 | const Context::Rule *rule; |
1924 | const Context::Rule *includeRules; |
1925 | |
1926 | bool hasResolvedIncludeRules() const |
1927 | { |
1928 | return rule == includeRules; |
1929 | } |
1930 | }; |
1931 | |
1932 | // Iterates over all the rules, including those in includedRules |
1933 | struct RuleIterator { |
1934 | RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule) |
1935 | : m_end(&endRule - rules.data()) |
1936 | , m_rules(rules) |
1937 | { |
1938 | } |
1939 | |
1940 | /// \return next rule or nullptr |
1941 | const Context::Rule *next() |
1942 | { |
1943 | // if in includedRules |
1944 | if (m_includedRules) { |
1945 | ++m_i2; |
1946 | if (m_i2 != m_includedRules->size()) { |
1947 | return (*m_includedRules)[m_i2]; |
1948 | } |
1949 | ++m_i; |
1950 | m_includedRules = nullptr; |
1951 | } |
1952 | |
1953 | // if is a includedRules |
1954 | while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) { |
1955 | if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) { |
1956 | m_i2 = 0; |
1957 | m_includedRules = &m_rules[m_i].rule->includedRules; |
1958 | return (*m_includedRules)[m_i2]; |
1959 | } |
1960 | ++m_i; |
1961 | } |
1962 | |
1963 | if (m_i < m_end) { |
1964 | ++m_i; |
1965 | return m_rules[m_i - 1].rule; |
1966 | } |
1967 | |
1968 | return nullptr; |
1969 | } |
1970 | |
1971 | /// \return current IncludeRules or nullptr |
1972 | const Context::Rule *currentIncludeRules() const |
1973 | { |
1974 | return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules; |
1975 | } |
1976 | |
1977 | private: |
1978 | int m_i = 0; |
1979 | int m_i2 = 0; |
1980 | const int m_end; |
1981 | const QList<ObservableRule> &m_rules; |
1982 | const QList<const Context::Rule *> *m_includedRules = nullptr; |
1983 | }; |
1984 | |
1985 | // Dot regex container that satisfies firstNonSpace and column. |
1986 | struct DotRegex { |
1987 | /// Append a dot regex rule. |
1988 | void append(const Context::Rule &rule, const Context::Rule *includedRule) |
1989 | { |
1990 | auto array = extractDotRegexes(rule); |
1991 | if (array[0]) { |
1992 | *array[0] = {.rule: &rule, .includeRules: includedRule}; |
1993 | } |
1994 | if (array[1]) { |
1995 | *array[1] = {.rule: &rule, .includeRules: includedRule}; |
1996 | } |
1997 | } |
1998 | |
1999 | /// Search dot regex which hides @p rule |
2000 | RuleAndInclude find(const Context::Rule &rule) |
2001 | { |
2002 | auto array = extractDotRegexes(rule); |
2003 | if (array[0]) { |
2004 | return *array[0]; |
2005 | } |
2006 | if (array[1]) { |
2007 | return *array[1]; |
2008 | } |
2009 | return RuleAndInclude{}; |
2010 | } |
2011 | |
2012 | private: |
2013 | using Array = std::array<RuleAndInclude *, 2>; |
2014 | |
2015 | Array (const Context::Rule &rule) |
2016 | { |
2017 | Array ret{}; |
2018 | |
2019 | if (rule.firstNonSpace != XmlBool::True && rule.column == -1) { |
2020 | ret[0] = &dotRegex; |
2021 | } else { |
2022 | if (rule.firstNonSpace == XmlBool::True) { |
2023 | ret[0] = &dotRegexFirstNonSpace; |
2024 | } |
2025 | |
2026 | if (rule.column == 0) { |
2027 | ret[1] = &dotRegexColumn0; |
2028 | } else if (rule.column > 0) { |
2029 | ret[1] = &dotRegexColumnGreaterThan0[rule.column]; |
2030 | } |
2031 | } |
2032 | |
2033 | return ret; |
2034 | } |
2035 | |
2036 | RuleAndInclude dotRegex{}; |
2037 | RuleAndInclude dotRegexColumn0{}; |
2038 | QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{}; |
2039 | RuleAndInclude dotRegexFirstNonSpace{}; |
2040 | }; |
2041 | |
2042 | bool success = true; |
2043 | |
2044 | // characters of DetectChar/AnyChar |
2045 | Char4Tables detectChars; |
2046 | // characters of dynamic DetectChar |
2047 | Char4Tables dynamicDetectChars; |
2048 | // characters of LineContinue |
2049 | Char4Tables lineContinueChars; |
2050 | |
2051 | Rule4 intRule{}; |
2052 | Rule4 floatRule{}; |
2053 | Rule4 hlCCharRule{}; |
2054 | Rule4 hlCOctRule{}; |
2055 | Rule4 hlCHexRule{}; |
2056 | Rule4 hlCStringCharRule{}; |
2057 | Rule4 detectIdentifierRule{}; |
2058 | |
2059 | // Contains includedRules and included includedRules |
2060 | QMap<Context const *, RuleAndInclude> includeContexts; |
2061 | |
2062 | DotRegex dotRegex; |
2063 | |
2064 | QList<ObservableRule> observedRules; |
2065 | observedRules.reserve(asize: context.rules.size()); |
2066 | for (const Context::Rule &rule : context.rules) { |
2067 | const Context::Rule *includeRule = nullptr; |
2068 | if (rule.type == Context::Rule::Type::IncludeRules) { |
2069 | auto *context = rule.context.context; |
2070 | if (context && context->isOnlyIncluded) { |
2071 | includeRule = &rule; |
2072 | } |
2073 | } |
2074 | |
2075 | observedRules.push_back(t: {.rule: &rule, .includeRules: includeRule}); |
2076 | if (includeRule) { |
2077 | for (const Context::Rule *rule2 : rule.includedRules) { |
2078 | observedRules.push_back(t: {.rule: rule2, .includeRules: includeRule}); |
2079 | } |
2080 | } |
2081 | } |
2082 | |
2083 | for (auto &observedRule : observedRules) { |
2084 | const Context::Rule &rule = *observedRule.rule; |
2085 | bool isUnreachable = false; |
2086 | QList<RuleAndInclude> unreachableBy; |
2087 | |
2088 | // declare rule as unreachable if ruleAndInclude is not empty |
2089 | auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) { |
2090 | if (ruleAndInclude) { |
2091 | isUnreachable = true; |
2092 | unreachableBy.append(t: ruleAndInclude); |
2093 | } |
2094 | }; |
2095 | |
2096 | // declare rule as unreachable if ruleAndIncludes is not empty |
2097 | auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) { |
2098 | if (!ruleAndIncludes.isEmpty()) { |
2099 | isUnreachable = true; |
2100 | unreachableBy.append(l: ruleAndIncludes); |
2101 | } |
2102 | }; |
2103 | |
2104 | // check if rule2.firstNonSpace/column is compatible with those of rule |
2105 | auto isCompatible = [&rule](Context::Rule const &rule2) { |
2106 | return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1) |
2107 | || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True); |
2108 | }; |
2109 | |
2110 | updateUnreachable1(dotRegex.find(rule)); |
2111 | |
2112 | switch (rule.type) { |
2113 | // checks if hidden by DetectChar/AnyChar |
2114 | // then add the characters to detectChars |
2115 | case Context::Rule::Type::AnyChar: { |
2116 | auto tables = CharTableArray(detectChars, rule); |
2117 | updateUnreachable2(tables.find(s: rule.string)); |
2118 | tables.removeNonSpecialWhenSpecial(); |
2119 | tables.append(s: rule.string, rule); |
2120 | break; |
2121 | } |
2122 | |
2123 | // check if is hidden by DetectChar/AnyChar |
2124 | // then add the characters to detectChars or dynamicDetectChars |
2125 | case Context::Rule::Type::DetectChar: { |
2126 | auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; |
2127 | auto tables = CharTableArray(chars4, rule); |
2128 | updateUnreachable1(tables.find(c: rule.char0)); |
2129 | tables.removeNonSpecialWhenSpecial(); |
2130 | tables.append(c: rule.char0, rule); |
2131 | break; |
2132 | } |
2133 | |
2134 | // check if hidden by DetectChar/AnyChar |
2135 | // then add spaces characters to detectChars |
2136 | case Context::Rule::Type::DetectSpaces: { |
2137 | auto tables = CharTableArray(detectChars, rule); |
2138 | updateUnreachable2(tables.find(QStringLiteral(" \t" ))); |
2139 | tables.removeNonSpecialWhenSpecial(); |
2140 | tables.append(c: QLatin1Char(' '), rule); |
2141 | tables.append(c: QLatin1Char('\t'), rule); |
2142 | break; |
2143 | } |
2144 | |
2145 | // check if hidden by DetectChar/AnyChar |
2146 | case Context::Rule::Type::HlCChar: |
2147 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: QLatin1Char('\''))); |
2148 | updateUnreachable1(hlCCharRule.setRule(rule)); |
2149 | break; |
2150 | |
2151 | // check if hidden by DetectChar/AnyChar |
2152 | case Context::Rule::Type::HlCHex: |
2153 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: QLatin1Char('0'))); |
2154 | updateUnreachable1(hlCHexRule.setRule(rule)); |
2155 | break; |
2156 | |
2157 | // check if hidden by DetectChar/AnyChar |
2158 | case Context::Rule::Type::HlCOct: |
2159 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: QLatin1Char('0'))); |
2160 | updateUnreachable1(hlCOctRule.setRule(rule)); |
2161 | break; |
2162 | |
2163 | // check if hidden by DetectChar/AnyChar |
2164 | case Context::Rule::Type::HlCStringChar: |
2165 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: QLatin1Char('\\'))); |
2166 | updateUnreachable1(hlCStringCharRule.setRule(rule)); |
2167 | break; |
2168 | |
2169 | // check if hidden by DetectChar/AnyChar |
2170 | case Context::Rule::Type::Int: |
2171 | updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789" ))); |
2172 | updateUnreachable1(intRule.setRule(rule)); |
2173 | break; |
2174 | |
2175 | // check if hidden by DetectChar/AnyChar |
2176 | case Context::Rule::Type::Float: |
2177 | updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789." ))); |
2178 | updateUnreachable1(floatRule.setRule(rule)); |
2179 | // check that Float is before Int |
2180 | updateUnreachable1(Rule4(intRule).setRule(rule)); |
2181 | break; |
2182 | |
2183 | // check if hidden by another DetectIdentifier rule |
2184 | case Context::Rule::Type::DetectIdentifier: |
2185 | updateUnreachable1(detectIdentifierRule.setRule(rule)); |
2186 | break; |
2187 | |
2188 | // check if hidden by DetectChar/AnyChar or another LineContinue |
2189 | case Context::Rule::Type::LineContinue: { |
2190 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0)); |
2191 | |
2192 | auto tables = CharTableArray(lineContinueChars, rule); |
2193 | updateUnreachable1(tables.find(c: rule.char0)); |
2194 | tables.removeNonSpecialWhenSpecial(); |
2195 | tables.append(c: rule.char0, rule); |
2196 | break; |
2197 | } |
2198 | |
2199 | // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect |
2200 | case Context::Rule::Type::Detect2Chars: |
2201 | case Context::Rule::Type::RangeDetect: |
2202 | updateUnreachable1(CharTableArray(detectChars, rule).find(c: rule.char0)); |
2203 | if (!isUnreachable) { |
2204 | RuleIterator ruleIterator(observedRules, observedRule); |
2205 | while (const auto *rulePtr = ruleIterator.next()) { |
2206 | if (isUnreachable) { |
2207 | break; |
2208 | } |
2209 | const auto &rule2 = *rulePtr; |
2210 | if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) { |
2211 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2212 | } |
2213 | } |
2214 | } |
2215 | break; |
2216 | |
2217 | case Context::Rule::Type::RegExpr: { |
2218 | if (rule.isDotRegex) { |
2219 | dotRegex.append(rule, includedRule: nullptr); |
2220 | break; |
2221 | } |
2222 | |
2223 | // check that `rule` does not have another RegExpr as a prefix |
2224 | RuleIterator ruleIterator(observedRules, observedRule); |
2225 | while (const auto *rulePtr = ruleIterator.next()) { |
2226 | if (isUnreachable) { |
2227 | break; |
2228 | } |
2229 | const auto &rule2 = *rulePtr; |
2230 | if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive |
2231 | && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(s: rule2.sanitizedString)) { |
2232 | bool add = (rule.sanitizedString.startsWith(s: rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2); |
2233 | if (!add) { |
2234 | // \s.* (sanitized = \s) is considered hiding \s*\S |
2235 | // we check the quantifiers to see if this is the case |
2236 | auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode(); |
2237 | auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode(); |
2238 | auto c3 = rule2.sanitizedString.back().unicode(); |
2239 | if (c3 == '*' || c3 == '?' || c3 == '+') { |
2240 | add = true; |
2241 | } else if (c1 == '*' || c1 == '?') { |
2242 | add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3)); |
2243 | } else { |
2244 | add = true; |
2245 | } |
2246 | } |
2247 | if (add) { |
2248 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2249 | } |
2250 | } |
2251 | } |
2252 | |
2253 | Q_FALLTHROUGH(); |
2254 | } |
2255 | // check if a rule does not have another rule as a prefix |
2256 | case Context::Rule::Type::WordDetect: |
2257 | case Context::Rule::Type::StringDetect: { |
2258 | // check that dynamic `rule` does not have another dynamic StringDetect as a prefix |
2259 | if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) { |
2260 | RuleIterator ruleIterator(observedRules, observedRule); |
2261 | while (const auto *rulePtr = ruleIterator.next()) { |
2262 | if (isUnreachable) { |
2263 | break; |
2264 | } |
2265 | |
2266 | const auto &rule2 = *rulePtr; |
2267 | if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) { |
2268 | continue; |
2269 | } |
2270 | |
2271 | const bool isSensitive = (rule2.insensitive == XmlBool::True); |
2272 | const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; |
2273 | if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(s: rule2.string, cs: caseSensitivity)) { |
2274 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2275 | } |
2276 | } |
2277 | } |
2278 | |
2279 | // string used for comparison and truncated from "dynamic" part |
2280 | QStringView s = rule.string; |
2281 | |
2282 | // truncate to '%' with dynamic rules |
2283 | if (rule.dynamic == XmlBool::True) { |
2284 | static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)" )); |
2285 | auto result = dynamicPosition.match(subject: rule.string); |
2286 | s = s.left(n: result.capturedLength()); |
2287 | } |
2288 | |
2289 | QString sanitizedRegex; |
2290 | // truncate to special character with RegExpr. |
2291 | // If regexp contains '|', `s` becomes empty. |
2292 | if (rule.type == Context::Rule::Type::RegExpr) { |
2293 | static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)" )); |
2294 | static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])" )); |
2295 | const qsizetype result = regularChars.match(subject: rule.string).capturedLength(); |
2296 | const qsizetype pos = qMin(a: result, b: s.size()); |
2297 | if (rule.string.indexOf(c: QLatin1Char('|'), from: pos) < pos) { |
2298 | sanitizedRegex = rule.string.left(n: qMin(a: result, b: s.size())); |
2299 | sanitizedRegex.replace(re: sanitizeChars, QStringLiteral("\\1" )); |
2300 | s = sanitizedRegex; |
2301 | } else { |
2302 | s = QStringView(); |
2303 | } |
2304 | } |
2305 | |
2306 | // check if hidden by DetectChar/AnyChar |
2307 | if (s.size() > 0) { |
2308 | auto t = CharTableArray(detectChars, rule); |
2309 | if (rule.insensitive != XmlBool::True) { |
2310 | updateUnreachable1(t.find(c: s[0])); |
2311 | } else { |
2312 | QChar c2[]{s[0].toLower(), s[0].toUpper()}; |
2313 | updateUnreachable2(t.find(s: QStringView(c2, 2))); |
2314 | } |
2315 | } |
2316 | |
2317 | // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s |
2318 | if (s.size() > 0 && !isUnreachable) { |
2319 | // combination of uppercase and lowercase |
2320 | RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}}; |
2321 | |
2322 | RuleIterator ruleIterator(observedRules, observedRule); |
2323 | while (const auto *rulePtr = ruleIterator.next()) { |
2324 | if (isUnreachable) { |
2325 | break; |
2326 | } |
2327 | const auto &rule2 = *rulePtr; |
2328 | const bool isSensitive = (rule2.insensitive == XmlBool::True); |
2329 | const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; |
2330 | |
2331 | switch (rule2.type) { |
2332 | // check that it is not a detectChars prefix |
2333 | case Context::Rule::Type::Detect2Chars: |
2334 | if (isCompatible(rule2) && s.size() >= 2) { |
2335 | if (rule.insensitive != XmlBool::True) { |
2336 | if (rule2.char0 == s[0] && rule2.char1 == s[1]) { |
2337 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2338 | } |
2339 | } else { |
2340 | // when the string is case insensitive, |
2341 | // all 4 upper/lower case combinations must be found |
2342 | auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) { |
2343 | if (!x && rule2.char0 == c1 && rule2.char0 == c2) { |
2344 | x = {.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}; |
2345 | } |
2346 | }; |
2347 | set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower()); |
2348 | set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper()); |
2349 | set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper()); |
2350 | set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower()); |
2351 | |
2352 | if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2] |
2353 | && detect2CharsInsensitives[3]) { |
2354 | isUnreachable = true; |
2355 | unreachableBy.append(t: detect2CharsInsensitives[0]); |
2356 | unreachableBy.append(t: detect2CharsInsensitives[1]); |
2357 | unreachableBy.append(t: detect2CharsInsensitives[2]); |
2358 | unreachableBy.append(t: detect2CharsInsensitives[3]); |
2359 | } |
2360 | } |
2361 | } |
2362 | break; |
2363 | |
2364 | // check that it is not a StringDetect prefix |
2365 | case Context::Rule::Type::StringDetect: |
2366 | if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True) |
2367 | && s.startsWith(s: rule2.string, cs: caseSensitivity)) { |
2368 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2369 | } |
2370 | break; |
2371 | |
2372 | // check if a WordDetect is hidden by another WordDetect |
2373 | case Context::Rule::Type::WordDetect: |
2374 | if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True) |
2375 | && 0 == rule.string.compare(s: rule2.string, cs: caseSensitivity)) { |
2376 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2377 | } |
2378 | break; |
2379 | |
2380 | default:; |
2381 | } |
2382 | } |
2383 | } |
2384 | |
2385 | break; |
2386 | } |
2387 | |
2388 | // check if hidden by another keyword rule |
2389 | case Context::Rule::Type::keyword: { |
2390 | RuleIterator ruleIterator(observedRules, observedRule); |
2391 | while (const auto *rulePtr = ruleIterator.next()) { |
2392 | if (isUnreachable) { |
2393 | break; |
2394 | } |
2395 | const auto &rule2 = *rulePtr; |
2396 | if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) { |
2397 | updateUnreachable1({.rule: &rule2, .includeRules: ruleIterator.currentIncludeRules()}); |
2398 | } |
2399 | } |
2400 | // TODO check that all keywords are hidden by another rules |
2401 | break; |
2402 | } |
2403 | |
2404 | // add characters in those used but without checking if they are already. |
2405 | // <DetectChar char="}" /> |
2406 | // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked |
2407 | // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added |
2408 | // <DetectChar char="{" /> <- hidden by previous rule |
2409 | case Context::Rule::Type::IncludeRules: |
2410 | if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { |
2411 | break; |
2412 | } |
2413 | |
2414 | if (auto &ruleAndInclude = includeContexts[rule.context.context]) { |
2415 | updateUnreachable1(ruleAndInclude); |
2416 | } else { |
2417 | ruleAndInclude.rule = &rule; |
2418 | } |
2419 | |
2420 | for (const auto *rulePtr : rule.includedIncludeRules) { |
2421 | includeContexts.insert(key: rulePtr->context.context, value: RuleAndInclude{.rule: rulePtr, .includeRules: &rule}); |
2422 | } |
2423 | |
2424 | if (observedRule.includeRules) { |
2425 | break; |
2426 | } |
2427 | |
2428 | for (const auto *rulePtr : rule.includedRules) { |
2429 | const auto &rule2 = *rulePtr; |
2430 | switch (rule2.type) { |
2431 | case Context::Rule::Type::AnyChar: { |
2432 | auto tables = CharTableArray(detectChars, rule2); |
2433 | tables.removeNonSpecialWhenSpecial(); |
2434 | tables.append(s: rule2.string, rule: rule2, includeRule: &rule); |
2435 | break; |
2436 | } |
2437 | |
2438 | case Context::Rule::Type::DetectChar: { |
2439 | auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; |
2440 | auto tables = CharTableArray(chars4, rule2); |
2441 | tables.removeNonSpecialWhenSpecial(); |
2442 | tables.append(c: rule2.char0, rule: rule2, includeRule: &rule); |
2443 | break; |
2444 | } |
2445 | |
2446 | case Context::Rule::Type::DetectSpaces: { |
2447 | auto tables = CharTableArray(detectChars, rule2); |
2448 | tables.removeNonSpecialWhenSpecial(); |
2449 | tables.append(c: QLatin1Char(' '), rule: rule2, includeRule: &rule); |
2450 | tables.append(c: QLatin1Char('\t'), rule: rule2, includeRule: &rule); |
2451 | break; |
2452 | } |
2453 | |
2454 | case Context::Rule::Type::HlCChar: |
2455 | hlCCharRule.setRule(rule: rule2, includeRules: &rule); |
2456 | break; |
2457 | |
2458 | case Context::Rule::Type::HlCHex: |
2459 | hlCHexRule.setRule(rule: rule2, includeRules: &rule); |
2460 | break; |
2461 | |
2462 | case Context::Rule::Type::HlCOct: |
2463 | hlCOctRule.setRule(rule: rule2, includeRules: &rule); |
2464 | break; |
2465 | |
2466 | case Context::Rule::Type::HlCStringChar: |
2467 | hlCStringCharRule.setRule(rule: rule2, includeRules: &rule); |
2468 | break; |
2469 | |
2470 | case Context::Rule::Type::Int: |
2471 | intRule.setRule(rule: rule2, includeRules: &rule); |
2472 | break; |
2473 | |
2474 | case Context::Rule::Type::Float: |
2475 | floatRule.setRule(rule: rule2, includeRules: &rule); |
2476 | break; |
2477 | |
2478 | case Context::Rule::Type::LineContinue: { |
2479 | auto tables = CharTableArray(lineContinueChars, rule2); |
2480 | tables.removeNonSpecialWhenSpecial(); |
2481 | tables.append(c: rule2.char0, rule: rule2, includeRule: &rule); |
2482 | break; |
2483 | } |
2484 | |
2485 | case Context::Rule::Type::RegExpr: |
2486 | if (rule2.isDotRegex) { |
2487 | dotRegex.append(rule: rule2, includedRule: &rule); |
2488 | } |
2489 | break; |
2490 | |
2491 | case Context::Rule::Type::WordDetect: |
2492 | case Context::Rule::Type::StringDetect: |
2493 | case Context::Rule::Type::Detect2Chars: |
2494 | case Context::Rule::Type::IncludeRules: |
2495 | case Context::Rule::Type::DetectIdentifier: |
2496 | case Context::Rule::Type::keyword: |
2497 | case Context::Rule::Type::Unknown: |
2498 | case Context::Rule::Type::RangeDetect: |
2499 | break; |
2500 | } |
2501 | } |
2502 | break; |
2503 | |
2504 | case Context::Rule::Type::Unknown: |
2505 | break; |
2506 | } |
2507 | |
2508 | if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { |
2509 | auto &unreachableIncludedRule = unreachableIncludedRules[&rule]; |
2510 | if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) { |
2511 | unreachableIncludedRule.unreachableBy.append(l: unreachableBy); |
2512 | } else { |
2513 | unreachableIncludedRule.alwaysUnreachable = false; |
2514 | } |
2515 | } else if (isUnreachable) { |
2516 | success = false; |
2517 | QString message; |
2518 | message.reserve(asize: 128); |
2519 | for (auto &ruleAndInclude : unreachableBy) { |
2520 | message += QStringLiteral("line " ); |
2521 | if (ruleAndInclude.includeRules) { |
2522 | message += QString::number(ruleAndInclude.includeRules->line); |
2523 | message += QStringLiteral(" [by '" ); |
2524 | message += ruleAndInclude.includeRules->context.name; |
2525 | message += QStringLiteral("' line " ); |
2526 | message += QString::number(ruleAndInclude.rule->line); |
2527 | if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) { |
2528 | message += QStringLiteral(" (" ); |
2529 | message += ruleAndInclude.rule->filename; |
2530 | message += QLatin1Char(')'); |
2531 | } |
2532 | message += QLatin1Char(']'); |
2533 | } else { |
2534 | message += QString::number(ruleAndInclude.rule->line); |
2535 | } |
2536 | message += QStringLiteral(", " ); |
2537 | } |
2538 | message.chop(n: 2); |
2539 | qWarning() << filename << "line" << rule.line << "unreachable rule by" << message; |
2540 | } |
2541 | } |
2542 | |
2543 | return success; |
2544 | } |
2545 | |
2546 | //! Proposes to merge certain rule sequences |
2547 | //! - several DetectChar/AnyChar into AnyChar |
2548 | //! - several RegExpr into one RegExpr |
2549 | bool suggestRuleMerger(const QString &filename, const Context &context) const |
2550 | { |
2551 | bool success = true; |
2552 | |
2553 | if (context.rules.isEmpty()) { |
2554 | return success; |
2555 | } |
2556 | |
2557 | auto it = context.rules.begin(); |
2558 | const auto end = context.rules.end() - 1; |
2559 | |
2560 | for (; it < end; ++it) { |
2561 | auto &rule1 = *it; |
2562 | auto &rule2 = it[1]; |
2563 | |
2564 | auto isCommonCompatible = [&] { |
2565 | if (rule1.lookAhead != rule2.lookAhead) { |
2566 | return false; |
2567 | } |
2568 | // ignore attribute when lookAhead is true |
2569 | if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) { |
2570 | return false; |
2571 | } |
2572 | // clang-format off |
2573 | return rule1.beginRegion == rule2.beginRegion |
2574 | && rule1.endRegion == rule2.endRegion |
2575 | && rule1.firstNonSpace == rule2.firstNonSpace |
2576 | && rule1.context.context == rule2.context.context |
2577 | && rule1.context.popCount == rule2.context.popCount; |
2578 | // clang-format on |
2579 | }; |
2580 | |
2581 | switch (rule1.type) { |
2582 | // request to merge AnyChar/DetectChar |
2583 | case Context::Rule::Type::AnyChar: |
2584 | case Context::Rule::Type::DetectChar: |
2585 | if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible() |
2586 | && rule1.column == rule2.column) { |
2587 | qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule" ; |
2588 | success = false; |
2589 | } |
2590 | break; |
2591 | |
2592 | // request to merge multiple RegExpr |
2593 | case Context::Rule::Type::RegExpr: |
2594 | if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic |
2595 | && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) { |
2596 | qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule" ; |
2597 | success = false; |
2598 | } |
2599 | break; |
2600 | |
2601 | case Context::Rule::Type::DetectSpaces: |
2602 | case Context::Rule::Type::HlCChar: |
2603 | case Context::Rule::Type::HlCHex: |
2604 | case Context::Rule::Type::HlCOct: |
2605 | case Context::Rule::Type::HlCStringChar: |
2606 | case Context::Rule::Type::Int: |
2607 | case Context::Rule::Type::Float: |
2608 | case Context::Rule::Type::LineContinue: |
2609 | case Context::Rule::Type::WordDetect: |
2610 | case Context::Rule::Type::StringDetect: |
2611 | case Context::Rule::Type::Detect2Chars: |
2612 | case Context::Rule::Type::IncludeRules: |
2613 | case Context::Rule::Type::DetectIdentifier: |
2614 | case Context::Rule::Type::keyword: |
2615 | case Context::Rule::Type::Unknown: |
2616 | case Context::Rule::Type::RangeDetect: |
2617 | break; |
2618 | } |
2619 | } |
2620 | |
2621 | return success; |
2622 | } |
2623 | |
2624 | //! Initialize the referenced context (ContextName::context) |
2625 | //! Some input / output examples are: |
2626 | //! - "#stay" -> "" |
2627 | //! - "#pop" -> "" |
2628 | //! - "Comment" -> "Comment" |
2629 | //! - "#pop!Comment" -> "Comment" |
2630 | //! - "##ISO C++" -> "" |
2631 | //! - "Comment##ISO C++"-> "Comment" in ISO C++ |
2632 | void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line) |
2633 | { |
2634 | QStringView name = contextName.name; |
2635 | if (name.isEmpty()) { |
2636 | contextName.stay = true; |
2637 | } else if (name.startsWith(QStringLiteral("#stay" ))) { |
2638 | name = name.mid(pos: 5); |
2639 | contextName.stay = true; |
2640 | contextName.context = &context; |
2641 | if (!name.isEmpty()) { |
2642 | qWarning() << definition.filename << "line" << line << "invalid context in" << context.name; |
2643 | m_success = false; |
2644 | } |
2645 | } else { |
2646 | while (name.startsWith(QStringLiteral("#pop" ))) { |
2647 | name = name.mid(pos: 4); |
2648 | ++contextName.popCount; |
2649 | } |
2650 | |
2651 | if (contextName.popCount && !name.isEmpty()) { |
2652 | if (name.startsWith(c: QLatin1Char('!')) && name.size() > 1) { |
2653 | name = name.mid(pos: 1); |
2654 | } else { |
2655 | qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name; |
2656 | m_success = false; |
2657 | } |
2658 | } |
2659 | |
2660 | if (!name.isEmpty()) { |
2661 | const int idx = name.indexOf(QStringLiteral("##" )); |
2662 | if (idx == -1) { |
2663 | auto it = definition.contexts.find(key: name.toString()); |
2664 | if (it != definition.contexts.end()) { |
2665 | contextName.context = &*it; |
2666 | } |
2667 | } else { |
2668 | auto defName = name.mid(pos: idx + 2); |
2669 | auto it = m_definitions.find(key: defName.toString()); |
2670 | if (it != m_definitions.end()) { |
2671 | auto listName = name.left(n: idx).toString(); |
2672 | definition.referencedDefinitions.insert(value: &*it); |
2673 | auto ctxIt = it->contexts.find(key: listName.isEmpty() ? it->firstContextName : listName); |
2674 | if (ctxIt != it->contexts.end()) { |
2675 | contextName.context = &*ctxIt; |
2676 | } |
2677 | } else { |
2678 | qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name; |
2679 | m_success = false; |
2680 | } |
2681 | } |
2682 | |
2683 | if (!contextName.context) { |
2684 | qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name; |
2685 | m_success = false; |
2686 | } |
2687 | } |
2688 | } |
2689 | } |
2690 | |
2691 | QMap<QString, Definition> m_definitions; |
2692 | Definition *m_currentDefinition = nullptr; |
2693 | Keywords *m_currentKeywords = nullptr; |
2694 | Context *m_currentContext = nullptr; |
2695 | bool m_success = true; |
2696 | }; |
2697 | |
2698 | namespace |
2699 | { |
2700 | QStringList readListing(const QString &fileName) |
2701 | { |
2702 | QFile file(fileName); |
2703 | if (!file.open(flags: QIODevice::ReadOnly)) { |
2704 | return QStringList(); |
2705 | } |
2706 | |
2707 | QXmlStreamReader xml(&file); |
2708 | QStringList listing; |
2709 | while (!xml.atEnd()) { |
2710 | xml.readNext(); |
2711 | |
2712 | // add only .xml files, no .json or stuff |
2713 | if (xml.isCharacters() && xml.text().contains(s: QLatin1String(".xml" ))) { |
2714 | listing.append(t: xml.text().toString()); |
2715 | } |
2716 | } |
2717 | |
2718 | if (xml.hasError()) { |
2719 | qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset(); |
2720 | listing.clear(); |
2721 | } |
2722 | |
2723 | return listing; |
2724 | } |
2725 | |
2726 | /** |
2727 | * check if the "extensions" attribute have valid wildcards |
2728 | * @param extensions extensions string to check |
2729 | * @return valid? |
2730 | */ |
2731 | bool checkExtensions(QStringView extensions) |
2732 | { |
2733 | // get list of extensions |
2734 | const QList<QStringView> extensionParts = extensions.split(sep: QLatin1Char(';'), behavior: Qt::SkipEmptyParts); |
2735 | |
2736 | // ok if empty |
2737 | if (extensionParts.isEmpty()) { |
2738 | return true; |
2739 | } |
2740 | |
2741 | // check that only valid wildcard things are inside the parts |
2742 | for (const auto &extension : extensionParts) { |
2743 | for (const auto c : extension) { |
2744 | // eat normal things |
2745 | if (c.isDigit() || c.isLetter()) { |
2746 | continue; |
2747 | } |
2748 | |
2749 | // allow some special characters |
2750 | if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) { |
2751 | continue; |
2752 | } |
2753 | |
2754 | // only allowed wildcard things: '?' and '*' |
2755 | if (c == QLatin1Char('?') || c == QLatin1Char('*')) { |
2756 | continue; |
2757 | } |
2758 | |
2759 | qWarning() << "invalid character" << c << "seen in extensions wildcard" ; |
2760 | return false; |
2761 | } |
2762 | } |
2763 | |
2764 | // all checks passed |
2765 | return true; |
2766 | } |
2767 | |
2768 | } |
2769 | |
2770 | int main(int argc, char *argv[]) |
2771 | { |
2772 | // get app instance |
2773 | QCoreApplication app(argc, argv); |
2774 | |
2775 | // ensure enough arguments are passed |
2776 | if (app.arguments().size() < 3) { |
2777 | return 1; |
2778 | } |
2779 | |
2780 | #ifdef HAS_XERCESC |
2781 | // care for proper init and cleanup |
2782 | XMLPlatformUtils::Initialize(); |
2783 | auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate); |
2784 | |
2785 | /* |
2786 | * parse XSD first time and cache it |
2787 | */ |
2788 | XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager); |
2789 | |
2790 | // create parser for the XSD |
2791 | SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd); |
2792 | init_parser(parser); |
2793 | QString messages; |
2794 | CustomErrorHandler eh(&messages); |
2795 | parser.setErrorHandler(&eh); |
2796 | |
2797 | // load grammar into the pool, on error just abort |
2798 | const auto xsdFile = app.arguments().at(2); |
2799 | if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || eh.failed()) { |
2800 | qWarning("Failed to parse XSD %s: %s" , qPrintable(xsdFile), qPrintable(messages)); |
2801 | return 2; |
2802 | } |
2803 | |
2804 | // lock the pool, no later modifications wanted! |
2805 | xsd.lockPool(); |
2806 | #endif |
2807 | |
2808 | const QString hlFilenamesListing = app.arguments().value(i: 3); |
2809 | if (hlFilenamesListing.isEmpty()) { |
2810 | return 1; |
2811 | } |
2812 | |
2813 | QStringList hlFilenames = readListing(fileName: hlFilenamesListing); |
2814 | if (hlFilenames.isEmpty()) { |
2815 | qWarning(msg: "Failed to read %s" , qPrintable(hlFilenamesListing)); |
2816 | return 3; |
2817 | } |
2818 | |
2819 | // text attributes |
2820 | const QStringList textAttributes = QStringList() << QStringLiteral("name" ) << QStringLiteral("alternativeNames" ) << QStringLiteral("section" ) |
2821 | << QStringLiteral("mimetype" ) << QStringLiteral("extensions" ) << QStringLiteral("style" ) |
2822 | << QStringLiteral("author" ) << QStringLiteral("license" ) << QStringLiteral("indenter" ); |
2823 | |
2824 | // index all given highlightings |
2825 | HlFilesChecker filesChecker; |
2826 | QVariantMap hls; |
2827 | int anyError = 0; |
2828 | for (const QString &hlFilename : std::as_const(t&: hlFilenames)) { |
2829 | QFile hlFile(hlFilename); |
2830 | if (!hlFile.open(flags: QIODevice::ReadOnly)) { |
2831 | qWarning(msg: "Failed to open %s" , qPrintable(hlFilename)); |
2832 | anyError = 3; |
2833 | continue; |
2834 | } |
2835 | |
2836 | #ifdef HAS_XERCESC |
2837 | // create parser |
2838 | SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd); |
2839 | init_parser(parser); |
2840 | QString messages; |
2841 | CustomErrorHandler eh(&messages); |
2842 | parser.setErrorHandler(&eh); |
2843 | |
2844 | // parse the XML file |
2845 | parser.parse((const char16_t *)hlFile.fileName().utf16()); |
2846 | |
2847 | // report issues |
2848 | if (eh.failed()) { |
2849 | qWarning("Failed to validate XML %s: %s" , qPrintable(hlFile.fileName()), qPrintable(messages)); |
2850 | anyError = 4; |
2851 | continue; |
2852 | } |
2853 | #endif |
2854 | |
2855 | // read the needed attributes from toplevel language tag |
2856 | hlFile.reset(); |
2857 | QXmlStreamReader xml(&hlFile); |
2858 | if (xml.readNextStartElement()) { |
2859 | if (xml.name() != QLatin1String("language" )) { |
2860 | anyError = 5; |
2861 | continue; |
2862 | } |
2863 | } else { |
2864 | anyError = 6; |
2865 | continue; |
2866 | } |
2867 | |
2868 | // map to store hl info |
2869 | QVariantMap hl; |
2870 | |
2871 | // transfer text attributes |
2872 | for (const QString &attribute : std::as_const(t: textAttributes)) { |
2873 | hl[attribute] = xml.attributes().value(qualifiedName: attribute).toString(); |
2874 | } |
2875 | |
2876 | // check if extensions have the right format |
2877 | if (!checkExtensions(extensions: hl[QStringLiteral("extensions" )].toString())) { |
2878 | qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions" )].toString(); |
2879 | anyError = 23; |
2880 | } |
2881 | |
2882 | // numerical attributes |
2883 | hl[QStringLiteral("version" )] = xml.attributes().value(qualifiedName: QLatin1String("version" )).toInt(); |
2884 | hl[QStringLiteral("priority" )] = xml.attributes().value(qualifiedName: QLatin1String("priority" )).toInt(); |
2885 | |
2886 | // add boolean one |
2887 | hl[QStringLiteral("hidden" )] = attrToBool(str: xml.attributes().value(qualifiedName: QLatin1String("hidden" ))); |
2888 | |
2889 | // keep some strings as UTF-8 for faster translations |
2890 | hl[QStringLiteral("nameUtf8" )] = hl[QStringLiteral("name" )].toString().toUtf8(); |
2891 | hl[QStringLiteral("sectionUtf8" )] = hl[QStringLiteral("section" )].toString().toUtf8(); |
2892 | |
2893 | // remember hl |
2894 | hls[QFileInfo(hlFile).fileName()] = hl; |
2895 | |
2896 | const QString hlName = hl[QStringLiteral("name" )].toString(); |
2897 | |
2898 | filesChecker.setDefinition(verStr: xml.attributes().value(QStringLiteral("kateversion" )), filename: hlFilename, name: hlName); |
2899 | |
2900 | // scan for broken regex or keywords with spaces |
2901 | while (!xml.atEnd()) { |
2902 | xml.readNext(); |
2903 | filesChecker.processElement(xml); |
2904 | } |
2905 | |
2906 | if (xml.hasError()) { |
2907 | anyError = 33; |
2908 | qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset(); |
2909 | } |
2910 | } |
2911 | |
2912 | filesChecker.resolveContexts(); |
2913 | |
2914 | if (!filesChecker.check()) { |
2915 | anyError = 7; |
2916 | } |
2917 | |
2918 | // bail out if any problem was seen |
2919 | if (anyError) { |
2920 | return anyError; |
2921 | } |
2922 | |
2923 | // create outfile, after all has worked! |
2924 | QFile outFile(app.arguments().at(i: 1)); |
2925 | if (!outFile.open(flags: QIODevice::WriteOnly | QIODevice::Truncate)) { |
2926 | return 9; |
2927 | } |
2928 | |
2929 | // write out json |
2930 | outFile.write(data: QCborValue::fromVariant(variant: QVariant(hls)).toCbor()); |
2931 | |
2932 | // be done |
2933 | return 0; |
2934 | } |
2935 | |