1/*
2 SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "highlightingdata_p.hpp"
8#include "ksyntaxhighlighting_logging.h"
9#include "xml_p.h"
10
11#include <QXmlStreamReader>
12#include <QStringView>
13
14using namespace KSyntaxHighlighting;
15
16template<class Data, class... Args>
17static void initRuleData(Data &data, Args &&...args)
18{
19 new (&data) Data{std::move(args)...};
20}
21
22static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str)
23{
24 return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive;
25}
26
27static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader)
28{
29 return HighlightingContextData::Rule::WordDelimiters{
30 reader.attributes().value(QLatin1String("additionalDeliminator")).toString(),
31 reader.attributes().value(QLatin1String("weakDeliminator")).toString(),
32 };
33}
34
35static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
36{
37 if (!str.isEmpty()) {
38 return true;
39 }
40
41 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty";
42 return false;
43}
44
45static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
46{
47 if (str.size() == 1) {
48 return true;
49 }
50
51 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character";
52 return false;
53}
54
55static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader)
56{
57 using Rule = HighlightingContextData::Rule;
58
59 QStringView name = reader.name();
60 const auto attrs = reader.attributes();
61 bool isIncludeRules = false;
62
63 if (name == QLatin1String("DetectChar")) {
64 const auto s = attrs.value(QLatin1String("char"));
65 if (!checkIsChar(s, "char", defName, reader)) {
66 return false;
67 }
68 const QChar c = s.at(0);
69 const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
70
71 initRuleData(rule.data.detectChar, c, dynamic);
72 rule.type = Rule::Type::DetectChar;
73 } else if (name == QLatin1String("RegExpr")) {
74 const auto pattern = attrs.value(QLatin1String("String"));
75 if (!checkIsNotEmpty(pattern, "String", defName, reader)) {
76 return false;
77 }
78
79 const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
80 const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal")));
81 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
82
83 initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic);
84 rule.type = Rule::Type::RegExpr;
85 } else if (name == QLatin1String("IncludeRules")) {
86 const auto context = attrs.value(QLatin1String("context"));
87 if (!checkIsNotEmpty(context, "context", defName, reader)) {
88 return false;
89 }
90 const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib")));
91
92 initRuleData(rule.data.includeRules, context.toString(), includeAttribute);
93 rule.type = Rule::Type::IncludeRules;
94 isIncludeRules = true;
95 } else if (name == QLatin1String("Detect2Chars")) {
96 const auto s1 = attrs.value(QLatin1String("char"));
97 const auto s2 = attrs.value(QLatin1String("char1"));
98 if (!checkIsChar(s1, "char", defName, reader)) {
99 return false;
100 }
101 if (!checkIsChar(s2, "char1", defName, reader)) {
102 return false;
103 }
104
105 initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0));
106 rule.type = Rule::Type::Detect2Chars;
107 } else if (name == QLatin1String("keyword")) {
108 const auto s = attrs.value(QLatin1String("String"));
109 if (!checkIsNotEmpty(s, "String", defName, reader)) {
110 return false;
111 }
112 Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive;
113 bool hasCaseSensitivityOverride = false;
114
115 /**
116 * we might overwrite the case sensitivity
117 * then we need to init the list for lookup of that sensitivity setting
118 */
119 if (attrs.hasAttribute(QLatin1String("insensitive"))) {
120 hasCaseSensitivityOverride = true;
121 caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
122 }
123
124 initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride);
125 rule.type = Rule::Type::Keyword;
126 } else if (name == QLatin1String("DetectSpaces")) {
127 rule.type = Rule::Type::DetectSpaces;
128 } else if (name == QLatin1String("StringDetect")) {
129 const auto string = attrs.value(QLatin1String("String"));
130 if (!checkIsNotEmpty(string, "String", defName, reader)) {
131 return false;
132 }
133 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
134 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
135 const bool isSensitive = (caseSensitivity == Qt::CaseSensitive);
136
137 // String can be replaced with DetectChar or AnyChar
138 if (!dynamic && string.size() == 1) {
139 QChar c = string.at(0);
140 if (isSensitive || c.toLower() == c.toUpper()) {
141 initRuleData(rule.data.detectChar, c, dynamic);
142 rule.type = Rule::Type::DetectChar;
143 } else {
144 initRuleData(rule.data.anyChar, c.toLower() + c.toUpper());
145 rule.type = Rule::Type::AnyChar;
146 }
147 }
148 // String can be replaced with Detect2Chars
149 else if (isSensitive && !dynamic && string.size() == 2) {
150 initRuleData(rule.data.detect2Chars, string.at(0), string.at(1));
151 rule.type = Rule::Type::Detect2Chars;
152 } else {
153 initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic);
154 rule.type = Rule::Type::StringDetect;
155 }
156 } else if (name == QLatin1String("WordDetect")) {
157 const auto word = attrs.value(QLatin1String("String"));
158 if (!checkIsNotEmpty(word, "String", defName, reader)) {
159 return false;
160 }
161 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
162
163 initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity);
164 rule.type = Rule::Type::WordDetect;
165 } else if (name == QLatin1String("AnyChar")) {
166 const auto chars = attrs.value(QLatin1String("String"));
167 if (!checkIsNotEmpty(chars, "String", defName, reader)) {
168 return false;
169 }
170
171 // AnyChar can be replaced with DetectChar
172 if (chars.size() == 1) {
173 initRuleData(rule.data.detectChar, chars.at(0), false);
174 rule.type = Rule::Type::DetectChar;
175 } else {
176 initRuleData(rule.data.anyChar, chars.toString());
177 rule.type = Rule::Type::AnyChar;
178 }
179 } else if (name == QLatin1String("DetectIdentifier")) {
180 rule.type = Rule::Type::DetectIdentifier;
181 } else if (name == QLatin1String("LineContinue")) {
182 const auto s = attrs.value(QLatin1String("char"));
183 const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0);
184
185 initRuleData(rule.data.lineContinue, c);
186 rule.type = Rule::Type::LineContinue;
187 } else if (name == QLatin1String("Int")) {
188 initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader));
189 rule.type = Rule::Type::Int;
190 } else if (name == QLatin1String("Float")) {
191 initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader));
192 rule.type = Rule::Type::Float;
193 } else if (name == QLatin1String("HlCStringChar")) {
194 rule.type = Rule::Type::HlCStringChar;
195 } else if (name == QLatin1String("RangeDetect")) {
196 const auto s1 = attrs.value(QLatin1String("char"));
197 const auto s2 = attrs.value(QLatin1String("char1"));
198 if (!checkIsChar(s1, "char", defName, reader)) {
199 return false;
200 }
201 if (!checkIsChar(s2, "char1", defName, reader)) {
202 return false;
203 }
204
205 initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0));
206 rule.type = Rule::Type::RangeDetect;
207 } else if (name == QLatin1String("HlCHex")) {
208 initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader));
209 rule.type = Rule::Type::HlCHex;
210 } else if (name == QLatin1String("HlCChar")) {
211 rule.type = Rule::Type::HlCChar;
212 } else if (name == QLatin1String("HlCOct")) {
213 initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader));
214 rule.type = Rule::Type::HlCOct;
215 } else {
216 qCWarning(Log) << "Unknown rule type:" << name;
217 return false;
218 }
219
220 if (!isIncludeRules) {
221 rule.common.contextName = attrs.value(QLatin1String("context")).toString();
222 rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString();
223 rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString();
224 rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace")));
225 rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead")));
226 // attribute is only used when lookAhead is false
227 if (!rule.common.lookAhead) {
228 rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString();
229 }
230 bool colOk = false;
231 rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk);
232 if (!colOk) {
233 rule.common.column = -1;
234 }
235 }
236
237 return true;
238}
239
240template<class Data1, class Data2, class Visitor>
241static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor)
242{
243 using Rule = HighlightingContextData::Rule;
244 using Type = Rule::Type;
245 switch (type) {
246 case Type::AnyChar:
247 visitor(data1.anyChar, data2.anyChar);
248 break;
249 case Type::DetectChar:
250 visitor(data1.detectChar, data2.detectChar);
251 break;
252 case Type::Detect2Chars:
253 visitor(data1.detect2Chars, data2.detect2Chars);
254 break;
255 case Type::HlCOct:
256 visitor(data1.hlCOct, data2.hlCOct);
257 break;
258 case Type::IncludeRules:
259 visitor(data1.includeRules, data2.includeRules);
260 break;
261 case Type::Int:
262 visitor(data1.detectInt, data2.detectInt);
263 break;
264 case Type::Keyword:
265 visitor(data1.keyword, data2.keyword);
266 break;
267 case Type::LineContinue:
268 visitor(data1.lineContinue, data2.lineContinue);
269 break;
270 case Type::RangeDetect:
271 visitor(data1.rangeDetect, data2.rangeDetect);
272 break;
273 case Type::RegExpr:
274 visitor(data1.regExpr, data2.regExpr);
275 break;
276 case Type::StringDetect:
277 visitor(data1.stringDetect, data2.stringDetect);
278 break;
279 case Type::WordDetect:
280 visitor(data1.wordDetect, data2.wordDetect);
281 break;
282 case Type::Float:
283 visitor(data1.detectFloat, data2.detectFloat);
284 break;
285 case Type::HlCHex:
286 visitor(data1.hlCHex, data2.hlCHex);
287 break;
288
289 case Type::HlCStringChar:
290 case Type::DetectIdentifier:
291 case Type::DetectSpaces:
292 case Type::HlCChar:
293 case Type::Unknown:;
294 }
295}
296
297HighlightingContextData::Rule::Rule() noexcept = default;
298
299HighlightingContextData::Rule::Rule(Rule &&other) noexcept
300 : common(std::move(other.common))
301{
302 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
303 using Data = std::remove_reference_t<decltype(data1)>;
304 new (&data1) Data(std::move(data2));
305 });
306 type = other.type;
307}
308
309HighlightingContextData::Rule::Rule(const Rule &other)
310 : common(other.common)
311{
312 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
313 using Data = std::remove_reference_t<decltype(data1)>;
314 new (&data1) Data(data2);
315 });
316 type = other.type;
317}
318
319HighlightingContextData::Rule::~Rule()
320{
321 dataRuleVisit(type, data, data, [](auto &data, auto &) {
322 using Data = std::remove_reference_t<decltype(data)>;
323 data.~Data();
324 });
325}
326
327HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str)
328{
329 if (str.isEmpty() || str == QStringLiteral("#stay")) {
330 return;
331 }
332
333 while (str.startsWith(QStringLiteral("#pop"))) {
334 ++m_popCount;
335 if (str.size() > 4 && str.at(4) == QLatin1Char('!')) {
336 str = str.mid(5);
337 break;
338 }
339 str = str.mid(4);
340 }
341
342 if (str.isEmpty()) {
343 return;
344 }
345
346 m_contextAndDefName = str.toString();
347 m_defNameIndex = str.indexOf(QStringLiteral("##"));
348}
349
350bool HighlightingContextData::ContextSwitch::isStay() const
351{
352 return m_popCount == -1 && m_contextAndDefName.isEmpty();
353}
354
355QStringView HighlightingContextData::ContextSwitch::contextName() const
356{
357 if (m_defNameIndex == -1) {
358 return m_contextAndDefName;
359 }
360 return QStringView(m_contextAndDefName).left(m_defNameIndex);
361}
362
363QStringView HighlightingContextData::ContextSwitch::defName() const
364{
365 if (m_defNameIndex == -1) {
366 return QStringView();
367 }
368 return QStringView(m_contextAndDefName).mid(m_defNameIndex + 2);
369}
370
371void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader)
372{
373 Q_ASSERT(reader.name() == QLatin1String("context"));
374 Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement);
375
376 name = reader.attributes().value(QLatin1String("name")).toString();
377 attribute = reader.attributes().value(QLatin1String("attribute")).toString();
378 lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString();
379 lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString();
380 fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString();
381 noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding")));
382 stopEmptyLineContextSwitchLoop = Xml::attrToBool(reader.attributes().value(QLatin1String("stopEmptyLineContextSwitchLoop")));
383
384 rules.reserve(n: 8);
385
386 reader.readNext();
387 while (!reader.atEnd()) {
388 switch (reader.tokenType()) {
389 case QXmlStreamReader::StartElement: {
390 auto &rule = rules.emplace_back();
391 if (!loadRule(defName, rule, reader)) {
392 rules.pop_back();
393 }
394 // be done with this rule, skip all subelements, e.g. no longer supported sub-rules
395 reader.skipCurrentElement();
396 reader.readNext();
397 break;
398 }
399 case QXmlStreamReader::EndElement:
400 return;
401 default:
402 reader.readNext();
403 break;
404 }
405 }
406}
407

source code of syntax-highlighting/src/lib/highlightingdata.cpp