1 | /* |
2 | SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "highlightingdata_p.hpp" |
8 | #include "ksyntaxhighlighting_logging.h" |
9 | #include "xml_p.h" |
10 | |
11 | #include <QXmlStreamReader> |
12 | #include <QStringView> |
13 | |
14 | using namespace KSyntaxHighlighting; |
15 | |
16 | template<class Data, class... Args> |
17 | static void initRuleData(Data &data, Args &&...args) |
18 | { |
19 | new (&data) Data{std::move(args)...}; |
20 | } |
21 | |
22 | static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str) |
23 | { |
24 | return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive; |
25 | } |
26 | |
27 | static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader) |
28 | { |
29 | return HighlightingContextData::Rule::WordDelimiters{ |
30 | .additionalDeliminator: reader.attributes().value(qualifiedName: QLatin1String("additionalDeliminator" )).toString(), |
31 | .weakDeliminator: reader.attributes().value(qualifiedName: QLatin1String("weakDeliminator" )).toString(), |
32 | }; |
33 | } |
34 | |
35 | static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) |
36 | { |
37 | if (!str.isEmpty()) { |
38 | return true; |
39 | } |
40 | |
41 | qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty" ; |
42 | return false; |
43 | } |
44 | |
45 | static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) |
46 | { |
47 | if (str.size() == 1) { |
48 | return true; |
49 | } |
50 | |
51 | qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character" ; |
52 | return false; |
53 | } |
54 | |
55 | static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader) |
56 | { |
57 | using Rule = HighlightingContextData::Rule; |
58 | |
59 | QStringView name = reader.name(); |
60 | const auto attrs = reader.attributes(); |
61 | bool isIncludeRules = false; |
62 | |
63 | if (name == QLatin1String("DetectChar" )) { |
64 | const auto s = attrs.value(qualifiedName: QLatin1String("char" )); |
65 | if (!checkIsChar(str: s, attrName: "char" , defName, reader)) { |
66 | return false; |
67 | } |
68 | const QChar c = s.at(n: 0); |
69 | const bool dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
70 | |
71 | initRuleData(data&: rule.data.detectChar, args: c, args: dynamic); |
72 | rule.type = Rule::Type::DetectChar; |
73 | } else if (name == QLatin1String("RegExpr" )) { |
74 | const auto pattern = attrs.value(qualifiedName: QLatin1String("String" )); |
75 | if (!checkIsNotEmpty(str: pattern, attrName: "String" , defName, reader)) { |
76 | return false; |
77 | } |
78 | |
79 | const auto isCaseInsensitive = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
80 | const auto isMinimal = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("minimal" ))); |
81 | const auto dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
82 | |
83 | initRuleData(data&: rule.data.regExpr, args: pattern.toString(), args: isCaseInsensitive, args: isMinimal, args: dynamic); |
84 | rule.type = Rule::Type::RegExpr; |
85 | } else if (name == QLatin1String("IncludeRules" )) { |
86 | const auto context = attrs.value(qualifiedName: QLatin1String("context" )); |
87 | if (!checkIsNotEmpty(str: context, attrName: "context" , defName, reader)) { |
88 | return false; |
89 | } |
90 | const bool includeAttribute = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("includeAttrib" ))); |
91 | |
92 | initRuleData(data&: rule.data.includeRules, args: context.toString(), args: includeAttribute); |
93 | rule.type = Rule::Type::IncludeRules; |
94 | isIncludeRules = true; |
95 | } else if (name == QLatin1String("Detect2Chars" )) { |
96 | const auto s1 = attrs.value(qualifiedName: QLatin1String("char" )); |
97 | const auto s2 = attrs.value(qualifiedName: QLatin1String("char1" )); |
98 | if (!checkIsChar(str: s1, attrName: "char" , defName, reader)) { |
99 | return false; |
100 | } |
101 | if (!checkIsChar(str: s2, attrName: "char1" , defName, reader)) { |
102 | return false; |
103 | } |
104 | |
105 | initRuleData(data&: rule.data.detect2Chars, args: s1.at(n: 0), args: s2.at(n: 0)); |
106 | rule.type = Rule::Type::Detect2Chars; |
107 | } else if (name == QLatin1String("keyword" )) { |
108 | const auto s = attrs.value(qualifiedName: QLatin1String("String" )); |
109 | if (!checkIsNotEmpty(str: s, attrName: "String" , defName, reader)) { |
110 | return false; |
111 | } |
112 | Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive; |
113 | bool hasCaseSensitivityOverride = false; |
114 | |
115 | /** |
116 | * we might overwrite the case sensitivity |
117 | * then we need to init the list for lookup of that sensitivity setting |
118 | */ |
119 | if (attrs.hasAttribute(qualifiedName: QLatin1String("insensitive" ))) { |
120 | hasCaseSensitivityOverride = true; |
121 | caseSensitivityOverride = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
122 | } |
123 | |
124 | initRuleData(data&: rule.data.keyword, args: s.toString(), args: loadAdditionalWordDelimiters(reader), args&: caseSensitivityOverride, args&: hasCaseSensitivityOverride); |
125 | rule.type = Rule::Type::Keyword; |
126 | } else if (name == QLatin1String("DetectSpaces" )) { |
127 | rule.type = Rule::Type::DetectSpaces; |
128 | } else if (name == QLatin1String("StringDetect" )) { |
129 | const auto string = attrs.value(qualifiedName: QLatin1String("String" )); |
130 | if (!checkIsNotEmpty(str: string, attrName: "String" , defName, reader)) { |
131 | return false; |
132 | } |
133 | const auto caseSensitivity = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
134 | const auto dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
135 | const bool isSensitive = (caseSensitivity == Qt::CaseSensitive); |
136 | |
137 | // String can be replaced with DetectChar or AnyChar |
138 | if (!dynamic && string.size() == 1) { |
139 | QChar c = string.at(n: 0); |
140 | if (isSensitive || c.toLower() == c.toUpper()) { |
141 | initRuleData(data&: rule.data.detectChar, args&: c, args: dynamic); |
142 | rule.type = Rule::Type::DetectChar; |
143 | } else { |
144 | initRuleData(data&: rule.data.anyChar, args: c.toLower() + c.toUpper()); |
145 | rule.type = Rule::Type::AnyChar; |
146 | } |
147 | } |
148 | // String can be replaced with Detect2Chars |
149 | else if (isSensitive && !dynamic && string.size() == 2) { |
150 | initRuleData(data&: rule.data.detect2Chars, args: string.at(n: 0), args: string.at(n: 1)); |
151 | rule.type = Rule::Type::Detect2Chars; |
152 | } else { |
153 | initRuleData(data&: rule.data.stringDetect, args: string.toString(), args: caseSensitivity, args: dynamic); |
154 | rule.type = Rule::Type::StringDetect; |
155 | } |
156 | } else if (name == QLatin1String("WordDetect" )) { |
157 | const auto word = attrs.value(qualifiedName: QLatin1String("String" )); |
158 | if (!checkIsNotEmpty(str: word, attrName: "String" , defName, reader)) { |
159 | return false; |
160 | } |
161 | const auto caseSensitivity = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
162 | |
163 | initRuleData(data&: rule.data.wordDetect, args: word.toString(), args: loadAdditionalWordDelimiters(reader), args: caseSensitivity); |
164 | rule.type = Rule::Type::WordDetect; |
165 | } else if (name == QLatin1String("AnyChar" )) { |
166 | const auto chars = attrs.value(qualifiedName: QLatin1String("String" )); |
167 | if (!checkIsNotEmpty(str: chars, attrName: "String" , defName, reader)) { |
168 | return false; |
169 | } |
170 | |
171 | // AnyChar can be replaced with DetectChar |
172 | if (chars.size() == 1) { |
173 | initRuleData(data&: rule.data.detectChar, args: chars.at(n: 0), args: false); |
174 | rule.type = Rule::Type::DetectChar; |
175 | } else { |
176 | initRuleData(data&: rule.data.anyChar, args: chars.toString()); |
177 | rule.type = Rule::Type::AnyChar; |
178 | } |
179 | } else if (name == QLatin1String("DetectIdentifier" )) { |
180 | rule.type = Rule::Type::DetectIdentifier; |
181 | } else if (name == QLatin1String("LineContinue" )) { |
182 | const auto s = attrs.value(qualifiedName: QLatin1String("char" )); |
183 | const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(n: 0); |
184 | |
185 | initRuleData(data&: rule.data.lineContinue, args: c); |
186 | rule.type = Rule::Type::LineContinue; |
187 | } else if (name == QLatin1String("Int" )) { |
188 | initRuleData(data&: rule.data.detectInt, args: loadAdditionalWordDelimiters(reader)); |
189 | rule.type = Rule::Type::Int; |
190 | } else if (name == QLatin1String("Float" )) { |
191 | initRuleData(data&: rule.data.detectFloat, args: loadAdditionalWordDelimiters(reader)); |
192 | rule.type = Rule::Type::Float; |
193 | } else if (name == QLatin1String("HlCStringChar" )) { |
194 | rule.type = Rule::Type::HlCStringChar; |
195 | } else if (name == QLatin1String("RangeDetect" )) { |
196 | const auto s1 = attrs.value(qualifiedName: QLatin1String("char" )); |
197 | const auto s2 = attrs.value(qualifiedName: QLatin1String("char1" )); |
198 | if (!checkIsChar(str: s1, attrName: "char" , defName, reader)) { |
199 | return false; |
200 | } |
201 | if (!checkIsChar(str: s2, attrName: "char1" , defName, reader)) { |
202 | return false; |
203 | } |
204 | |
205 | initRuleData(data&: rule.data.rangeDetect, args: s1.at(n: 0), args: s2.at(n: 0)); |
206 | rule.type = Rule::Type::RangeDetect; |
207 | } else if (name == QLatin1String("HlCHex" )) { |
208 | initRuleData(data&: rule.data.hlCHex, args: loadAdditionalWordDelimiters(reader)); |
209 | rule.type = Rule::Type::HlCHex; |
210 | } else if (name == QLatin1String("HlCChar" )) { |
211 | rule.type = Rule::Type::HlCChar; |
212 | } else if (name == QLatin1String("HlCOct" )) { |
213 | initRuleData(data&: rule.data.hlCOct, args: loadAdditionalWordDelimiters(reader)); |
214 | rule.type = Rule::Type::HlCOct; |
215 | } else { |
216 | qCWarning(Log) << "Unknown rule type:" << name; |
217 | return false; |
218 | } |
219 | |
220 | if (!isIncludeRules) { |
221 | rule.common.contextName = attrs.value(qualifiedName: QLatin1String("context" )).toString(); |
222 | rule.common.beginRegionName = attrs.value(qualifiedName: QLatin1String("beginRegion" )).toString(); |
223 | rule.common.endRegionName = attrs.value(qualifiedName: QLatin1String("endRegion" )).toString(); |
224 | rule.common.firstNonSpace = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("firstNonSpace" ))); |
225 | rule.common.lookAhead = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("lookAhead" ))); |
226 | // attribute is only used when lookAhead is false |
227 | if (!rule.common.lookAhead) { |
228 | rule.common.attributeName = attrs.value(qualifiedName: QLatin1String("attribute" )).toString(); |
229 | } |
230 | bool colOk = false; |
231 | rule.common.column = attrs.value(qualifiedName: QLatin1String("column" )).toInt(ok: &colOk); |
232 | if (!colOk) { |
233 | rule.common.column = -1; |
234 | } |
235 | } |
236 | |
237 | return true; |
238 | } |
239 | |
240 | template<class Data1, class Data2, class Visitor> |
241 | static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor) |
242 | { |
243 | using Rule = HighlightingContextData::Rule; |
244 | using Type = Rule::Type; |
245 | switch (type) { |
246 | case Type::AnyChar: |
247 | visitor(data1.anyChar, data2.anyChar); |
248 | break; |
249 | case Type::DetectChar: |
250 | visitor(data1.detectChar, data2.detectChar); |
251 | break; |
252 | case Type::Detect2Chars: |
253 | visitor(data1.detect2Chars, data2.detect2Chars); |
254 | break; |
255 | case Type::HlCOct: |
256 | visitor(data1.hlCOct, data2.hlCOct); |
257 | break; |
258 | case Type::IncludeRules: |
259 | visitor(data1.includeRules, data2.includeRules); |
260 | break; |
261 | case Type::Int: |
262 | visitor(data1.detectInt, data2.detectInt); |
263 | break; |
264 | case Type::Keyword: |
265 | visitor(data1.keyword, data2.keyword); |
266 | break; |
267 | case Type::LineContinue: |
268 | visitor(data1.lineContinue, data2.lineContinue); |
269 | break; |
270 | case Type::RangeDetect: |
271 | visitor(data1.rangeDetect, data2.rangeDetect); |
272 | break; |
273 | case Type::RegExpr: |
274 | visitor(data1.regExpr, data2.regExpr); |
275 | break; |
276 | case Type::StringDetect: |
277 | visitor(data1.stringDetect, data2.stringDetect); |
278 | break; |
279 | case Type::WordDetect: |
280 | visitor(data1.wordDetect, data2.wordDetect); |
281 | break; |
282 | case Type::Float: |
283 | visitor(data1.detectFloat, data2.detectFloat); |
284 | break; |
285 | case Type::HlCHex: |
286 | visitor(data1.hlCHex, data2.hlCHex); |
287 | break; |
288 | |
289 | case Type::HlCStringChar: |
290 | case Type::DetectIdentifier: |
291 | case Type::DetectSpaces: |
292 | case Type::HlCChar: |
293 | case Type::Unknown:; |
294 | } |
295 | } |
296 | |
297 | HighlightingContextData::Rule::Rule() noexcept = default; |
298 | |
299 | HighlightingContextData::Rule::Rule(Rule &&other) noexcept |
300 | : common(std::move(other.common)) |
301 | { |
302 | dataRuleVisit(type: other.type, data1&: data, data2&: other.data, visitor: [](auto &data1, auto &data2) { |
303 | using Data = std::remove_reference_t<decltype(data1)>; |
304 | new (&data1) Data(std::move(data2)); |
305 | }); |
306 | type = other.type; |
307 | } |
308 | |
309 | HighlightingContextData::Rule::Rule(const Rule &other) |
310 | : common(other.common) |
311 | { |
312 | dataRuleVisit(type: other.type, data1&: data, data2: other.data, visitor: [](auto &data1, auto &data2) { |
313 | using Data = std::remove_reference_t<decltype(data1)>; |
314 | new (&data1) Data(data2); |
315 | }); |
316 | type = other.type; |
317 | } |
318 | |
319 | HighlightingContextData::Rule::~Rule() |
320 | { |
321 | dataRuleVisit(type, data1&: data, data2&: data, visitor: [](auto &data, auto &) { |
322 | using Data = std::remove_reference_t<decltype(data)>; |
323 | data.~Data(); |
324 | }); |
325 | } |
326 | |
327 | HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str) |
328 | { |
329 | if (str.isEmpty() || str == QStringLiteral("#stay" )) { |
330 | return; |
331 | } |
332 | |
333 | while (str.startsWith(QStringLiteral("#pop" ))) { |
334 | ++m_popCount; |
335 | if (str.size() > 4 && str.at(n: 4) == QLatin1Char('!')) { |
336 | str = str.mid(pos: 5); |
337 | break; |
338 | } |
339 | str = str.mid(pos: 4); |
340 | } |
341 | |
342 | if (str.isEmpty()) { |
343 | return; |
344 | } |
345 | |
346 | m_contextAndDefName = str.toString(); |
347 | m_defNameIndex = str.indexOf(QStringLiteral("##" )); |
348 | } |
349 | |
350 | bool HighlightingContextData::ContextSwitch::isStay() const |
351 | { |
352 | return m_popCount == -1 && m_contextAndDefName.isEmpty(); |
353 | } |
354 | |
355 | QStringView HighlightingContextData::ContextSwitch::contextName() const |
356 | { |
357 | if (m_defNameIndex == -1) { |
358 | return m_contextAndDefName; |
359 | } |
360 | return QStringView(m_contextAndDefName).left(n: m_defNameIndex); |
361 | } |
362 | |
363 | QStringView HighlightingContextData::ContextSwitch::defName() const |
364 | { |
365 | if (m_defNameIndex == -1) { |
366 | return QStringView(); |
367 | } |
368 | return QStringView(m_contextAndDefName).mid(pos: m_defNameIndex + 2); |
369 | } |
370 | |
371 | void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader) |
372 | { |
373 | Q_ASSERT(reader.name() == QLatin1String("context" )); |
374 | Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); |
375 | |
376 | name = reader.attributes().value(qualifiedName: QLatin1String("name" )).toString(); |
377 | attribute = reader.attributes().value(qualifiedName: QLatin1String("attribute" )).toString(); |
378 | lineEndContext = reader.attributes().value(qualifiedName: QLatin1String("lineEndContext" )).toString(); |
379 | lineEmptyContext = reader.attributes().value(qualifiedName: QLatin1String("lineEmptyContext" )).toString(); |
380 | fallthroughContext = reader.attributes().value(qualifiedName: QLatin1String("fallthroughContext" )).toString(); |
381 | noIndentationBasedFolding = Xml::attrToBool(str: reader.attributes().value(qualifiedName: QLatin1String("noIndentationBasedFolding" ))); |
382 | stopEmptyLineContextSwitchLoop = Xml::attrToBool(str: reader.attributes().value(qualifiedName: QLatin1String("stopEmptyLineContextSwitchLoop" ))); |
383 | |
384 | rules.reserve(n: 8); |
385 | |
386 | reader.readNext(); |
387 | while (!reader.atEnd()) { |
388 | switch (reader.tokenType()) { |
389 | case QXmlStreamReader::StartElement: { |
390 | auto &rule = rules.emplace_back(); |
391 | if (!loadRule(defName, rule, reader)) { |
392 | rules.pop_back(); |
393 | } |
394 | // be done with this rule, skip all subelements, e.g. no longer supported sub-rules |
395 | reader.skipCurrentElement(); |
396 | reader.readNext(); |
397 | break; |
398 | } |
399 | case QXmlStreamReader::EndElement: |
400 | return; |
401 | default: |
402 | reader.readNext(); |
403 | break; |
404 | } |
405 | } |
406 | } |
407 | |