| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com> |
| 3 | |
| 4 | SPDX-License-Identifier: MIT |
| 5 | */ |
| 6 | |
| 7 | #include "highlightingdata_p.hpp" |
| 8 | #include "ksyntaxhighlighting_logging.h" |
| 9 | #include "xml_p.h" |
| 10 | |
| 11 | #include <QXmlStreamReader> |
| 12 | #include <QStringView> |
| 13 | |
| 14 | using namespace KSyntaxHighlighting; |
| 15 | |
| 16 | template<class Data, class... Args> |
| 17 | static void initRuleData(Data &data, Args &&...args) |
| 18 | { |
| 19 | new (&data) Data{std::move(args)...}; |
| 20 | } |
| 21 | |
| 22 | static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str) |
| 23 | { |
| 24 | return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive; |
| 25 | } |
| 26 | |
| 27 | static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader) |
| 28 | { |
| 29 | return HighlightingContextData::Rule::WordDelimiters{ |
| 30 | .additionalDeliminator: reader.attributes().value(qualifiedName: QLatin1String("additionalDeliminator" )).toString(), |
| 31 | .weakDeliminator: reader.attributes().value(qualifiedName: QLatin1String("weakDeliminator" )).toString(), |
| 32 | }; |
| 33 | } |
| 34 | |
| 35 | static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) |
| 36 | { |
| 37 | if (!str.isEmpty()) { |
| 38 | return true; |
| 39 | } |
| 40 | |
| 41 | qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty" ; |
| 42 | return false; |
| 43 | } |
| 44 | |
| 45 | static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) |
| 46 | { |
| 47 | if (str.size() == 1) { |
| 48 | return true; |
| 49 | } |
| 50 | |
| 51 | qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character" ; |
| 52 | return false; |
| 53 | } |
| 54 | |
| 55 | static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader) |
| 56 | { |
| 57 | using Rule = HighlightingContextData::Rule; |
| 58 | |
| 59 | QStringView name = reader.name(); |
| 60 | const auto attrs = reader.attributes(); |
| 61 | bool isIncludeRules = false; |
| 62 | |
| 63 | if (name == QLatin1String("DetectChar" )) { |
| 64 | const auto s = attrs.value(qualifiedName: QLatin1String("char" )); |
| 65 | if (!checkIsChar(str: s, attrName: "char" , defName, reader)) { |
| 66 | return false; |
| 67 | } |
| 68 | const QChar c = s.at(n: 0); |
| 69 | const bool dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
| 70 | |
| 71 | initRuleData(data&: rule.data.detectChar, args: c, args: dynamic); |
| 72 | rule.type = Rule::Type::DetectChar; |
| 73 | } else if (name == QLatin1String("RegExpr" )) { |
| 74 | const auto pattern = attrs.value(qualifiedName: QLatin1String("String" )); |
| 75 | if (!checkIsNotEmpty(str: pattern, attrName: "String" , defName, reader)) { |
| 76 | return false; |
| 77 | } |
| 78 | |
| 79 | const auto isCaseInsensitive = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
| 80 | const auto isMinimal = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("minimal" ))); |
| 81 | const auto dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
| 82 | |
| 83 | initRuleData(data&: rule.data.regExpr, args: pattern.toString(), args: isCaseInsensitive, args: isMinimal, args: dynamic); |
| 84 | rule.type = Rule::Type::RegExpr; |
| 85 | } else if (name == QLatin1String("IncludeRules" )) { |
| 86 | const auto context = attrs.value(qualifiedName: QLatin1String("context" )); |
| 87 | if (!checkIsNotEmpty(str: context, attrName: "context" , defName, reader)) { |
| 88 | return false; |
| 89 | } |
| 90 | const bool includeAttribute = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("includeAttrib" ))); |
| 91 | |
| 92 | initRuleData(data&: rule.data.includeRules, args: context.toString(), args: includeAttribute); |
| 93 | rule.type = Rule::Type::IncludeRules; |
| 94 | isIncludeRules = true; |
| 95 | } else if (name == QLatin1String("Detect2Chars" )) { |
| 96 | const auto s1 = attrs.value(qualifiedName: QLatin1String("char" )); |
| 97 | const auto s2 = attrs.value(qualifiedName: QLatin1String("char1" )); |
| 98 | if (!checkIsChar(str: s1, attrName: "char" , defName, reader)) { |
| 99 | return false; |
| 100 | } |
| 101 | if (!checkIsChar(str: s2, attrName: "char1" , defName, reader)) { |
| 102 | return false; |
| 103 | } |
| 104 | |
| 105 | initRuleData(data&: rule.data.detect2Chars, args: s1.at(n: 0), args: s2.at(n: 0)); |
| 106 | rule.type = Rule::Type::Detect2Chars; |
| 107 | } else if (name == QLatin1String("keyword" )) { |
| 108 | const auto s = attrs.value(qualifiedName: QLatin1String("String" )); |
| 109 | if (!checkIsNotEmpty(str: s, attrName: "String" , defName, reader)) { |
| 110 | return false; |
| 111 | } |
| 112 | Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive; |
| 113 | bool hasCaseSensitivityOverride = false; |
| 114 | |
| 115 | /** |
| 116 | * we might overwrite the case sensitivity |
| 117 | * then we need to init the list for lookup of that sensitivity setting |
| 118 | */ |
| 119 | if (attrs.hasAttribute(qualifiedName: QLatin1String("insensitive" ))) { |
| 120 | hasCaseSensitivityOverride = true; |
| 121 | caseSensitivityOverride = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
| 122 | } |
| 123 | |
| 124 | initRuleData(data&: rule.data.keyword, args: s.toString(), args: loadAdditionalWordDelimiters(reader), args&: caseSensitivityOverride, args&: hasCaseSensitivityOverride); |
| 125 | rule.type = Rule::Type::Keyword; |
| 126 | } else if (name == QLatin1String("DetectSpaces" )) { |
| 127 | rule.type = Rule::Type::DetectSpaces; |
| 128 | } else if (name == QLatin1String("StringDetect" )) { |
| 129 | const auto string = attrs.value(qualifiedName: QLatin1String("String" )); |
| 130 | if (!checkIsNotEmpty(str: string, attrName: "String" , defName, reader)) { |
| 131 | return false; |
| 132 | } |
| 133 | const auto caseSensitivity = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
| 134 | const auto dynamic = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("dynamic" ))); |
| 135 | const bool isSensitive = (caseSensitivity == Qt::CaseSensitive); |
| 136 | |
| 137 | // String can be replaced with DetectChar or AnyChar |
| 138 | if (!dynamic && string.size() == 1) { |
| 139 | QChar c = string.at(n: 0); |
| 140 | if (isSensitive || c.toLower() == c.toUpper()) { |
| 141 | initRuleData(data&: rule.data.detectChar, args&: c, args: dynamic); |
| 142 | rule.type = Rule::Type::DetectChar; |
| 143 | } else { |
| 144 | initRuleData(data&: rule.data.anyChar, args: c.toLower() + c.toUpper()); |
| 145 | rule.type = Rule::Type::AnyChar; |
| 146 | } |
| 147 | } |
| 148 | // String can be replaced with Detect2Chars |
| 149 | else if (isSensitive && !dynamic && string.size() == 2) { |
| 150 | initRuleData(data&: rule.data.detect2Chars, args: string.at(n: 0), args: string.at(n: 1)); |
| 151 | rule.type = Rule::Type::Detect2Chars; |
| 152 | } else { |
| 153 | initRuleData(data&: rule.data.stringDetect, args: string.toString(), args: caseSensitivity, args: dynamic); |
| 154 | rule.type = Rule::Type::StringDetect; |
| 155 | } |
| 156 | } else if (name == QLatin1String("WordDetect" )) { |
| 157 | const auto word = attrs.value(qualifiedName: QLatin1String("String" )); |
| 158 | if (!checkIsNotEmpty(str: word, attrName: "String" , defName, reader)) { |
| 159 | return false; |
| 160 | } |
| 161 | const auto caseSensitivity = attrToCaseSensitivity(str: attrs.value(qualifiedName: QLatin1String("insensitive" ))); |
| 162 | |
| 163 | initRuleData(data&: rule.data.wordDetect, args: word.toString(), args: loadAdditionalWordDelimiters(reader), args: caseSensitivity); |
| 164 | rule.type = Rule::Type::WordDetect; |
| 165 | } else if (name == QLatin1String("AnyChar" )) { |
| 166 | const auto chars = attrs.value(qualifiedName: QLatin1String("String" )); |
| 167 | if (!checkIsNotEmpty(str: chars, attrName: "String" , defName, reader)) { |
| 168 | return false; |
| 169 | } |
| 170 | |
| 171 | // AnyChar can be replaced with DetectChar |
| 172 | if (chars.size() == 1) { |
| 173 | initRuleData(data&: rule.data.detectChar, args: chars.at(n: 0), args: false); |
| 174 | rule.type = Rule::Type::DetectChar; |
| 175 | } else { |
| 176 | initRuleData(data&: rule.data.anyChar, args: chars.toString()); |
| 177 | rule.type = Rule::Type::AnyChar; |
| 178 | } |
| 179 | } else if (name == QLatin1String("DetectIdentifier" )) { |
| 180 | rule.type = Rule::Type::DetectIdentifier; |
| 181 | } else if (name == QLatin1String("LineContinue" )) { |
| 182 | const auto s = attrs.value(qualifiedName: QLatin1String("char" )); |
| 183 | const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(n: 0); |
| 184 | |
| 185 | initRuleData(data&: rule.data.lineContinue, args: c); |
| 186 | rule.type = Rule::Type::LineContinue; |
| 187 | } else if (name == QLatin1String("Int" )) { |
| 188 | initRuleData(data&: rule.data.detectInt, args: loadAdditionalWordDelimiters(reader)); |
| 189 | rule.type = Rule::Type::Int; |
| 190 | } else if (name == QLatin1String("Float" )) { |
| 191 | initRuleData(data&: rule.data.detectFloat, args: loadAdditionalWordDelimiters(reader)); |
| 192 | rule.type = Rule::Type::Float; |
| 193 | } else if (name == QLatin1String("HlCStringChar" )) { |
| 194 | rule.type = Rule::Type::HlCStringChar; |
| 195 | } else if (name == QLatin1String("RangeDetect" )) { |
| 196 | const auto s1 = attrs.value(qualifiedName: QLatin1String("char" )); |
| 197 | const auto s2 = attrs.value(qualifiedName: QLatin1String("char1" )); |
| 198 | if (!checkIsChar(str: s1, attrName: "char" , defName, reader)) { |
| 199 | return false; |
| 200 | } |
| 201 | if (!checkIsChar(str: s2, attrName: "char1" , defName, reader)) { |
| 202 | return false; |
| 203 | } |
| 204 | |
| 205 | initRuleData(data&: rule.data.rangeDetect, args: s1.at(n: 0), args: s2.at(n: 0)); |
| 206 | rule.type = Rule::Type::RangeDetect; |
| 207 | } else if (name == QLatin1String("HlCHex" )) { |
| 208 | initRuleData(data&: rule.data.hlCHex, args: loadAdditionalWordDelimiters(reader)); |
| 209 | rule.type = Rule::Type::HlCHex; |
| 210 | } else if (name == QLatin1String("HlCChar" )) { |
| 211 | rule.type = Rule::Type::HlCChar; |
| 212 | } else if (name == QLatin1String("HlCOct" )) { |
| 213 | initRuleData(data&: rule.data.hlCOct, args: loadAdditionalWordDelimiters(reader)); |
| 214 | rule.type = Rule::Type::HlCOct; |
| 215 | } else { |
| 216 | qCWarning(Log) << "Unknown rule type:" << name; |
| 217 | return false; |
| 218 | } |
| 219 | |
| 220 | if (!isIncludeRules) { |
| 221 | rule.common.contextName = attrs.value(qualifiedName: QLatin1String("context" )).toString(); |
| 222 | rule.common.beginRegionName = attrs.value(qualifiedName: QLatin1String("beginRegion" )).toString(); |
| 223 | rule.common.endRegionName = attrs.value(qualifiedName: QLatin1String("endRegion" )).toString(); |
| 224 | rule.common.firstNonSpace = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("firstNonSpace" ))); |
| 225 | rule.common.lookAhead = Xml::attrToBool(str: attrs.value(qualifiedName: QLatin1String("lookAhead" ))); |
| 226 | // attribute is only used when lookAhead is false |
| 227 | if (!rule.common.lookAhead) { |
| 228 | rule.common.attributeName = attrs.value(qualifiedName: QLatin1String("attribute" )).toString(); |
| 229 | } |
| 230 | bool colOk = false; |
| 231 | rule.common.column = attrs.value(qualifiedName: QLatin1String("column" )).toInt(ok: &colOk); |
| 232 | if (!colOk) { |
| 233 | rule.common.column = -1; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | return true; |
| 238 | } |
| 239 | |
| 240 | template<class Data1, class Data2, class Visitor> |
| 241 | static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor) |
| 242 | { |
| 243 | using Rule = HighlightingContextData::Rule; |
| 244 | using Type = Rule::Type; |
| 245 | switch (type) { |
| 246 | case Type::AnyChar: |
| 247 | visitor(data1.anyChar, data2.anyChar); |
| 248 | break; |
| 249 | case Type::DetectChar: |
| 250 | visitor(data1.detectChar, data2.detectChar); |
| 251 | break; |
| 252 | case Type::Detect2Chars: |
| 253 | visitor(data1.detect2Chars, data2.detect2Chars); |
| 254 | break; |
| 255 | case Type::HlCOct: |
| 256 | visitor(data1.hlCOct, data2.hlCOct); |
| 257 | break; |
| 258 | case Type::IncludeRules: |
| 259 | visitor(data1.includeRules, data2.includeRules); |
| 260 | break; |
| 261 | case Type::Int: |
| 262 | visitor(data1.detectInt, data2.detectInt); |
| 263 | break; |
| 264 | case Type::Keyword: |
| 265 | visitor(data1.keyword, data2.keyword); |
| 266 | break; |
| 267 | case Type::LineContinue: |
| 268 | visitor(data1.lineContinue, data2.lineContinue); |
| 269 | break; |
| 270 | case Type::RangeDetect: |
| 271 | visitor(data1.rangeDetect, data2.rangeDetect); |
| 272 | break; |
| 273 | case Type::RegExpr: |
| 274 | visitor(data1.regExpr, data2.regExpr); |
| 275 | break; |
| 276 | case Type::StringDetect: |
| 277 | visitor(data1.stringDetect, data2.stringDetect); |
| 278 | break; |
| 279 | case Type::WordDetect: |
| 280 | visitor(data1.wordDetect, data2.wordDetect); |
| 281 | break; |
| 282 | case Type::Float: |
| 283 | visitor(data1.detectFloat, data2.detectFloat); |
| 284 | break; |
| 285 | case Type::HlCHex: |
| 286 | visitor(data1.hlCHex, data2.hlCHex); |
| 287 | break; |
| 288 | |
| 289 | case Type::HlCStringChar: |
| 290 | case Type::DetectIdentifier: |
| 291 | case Type::DetectSpaces: |
| 292 | case Type::HlCChar: |
| 293 | case Type::Unknown:; |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | HighlightingContextData::Rule::Rule() noexcept = default; |
| 298 | |
| 299 | HighlightingContextData::Rule::Rule(Rule &&other) noexcept |
| 300 | : common(std::move(other.common)) |
| 301 | { |
| 302 | dataRuleVisit(type: other.type, data1&: data, data2&: other.data, visitor: [](auto &data1, auto &data2) { |
| 303 | using Data = std::remove_reference_t<decltype(data1)>; |
| 304 | new (&data1) Data(std::move(data2)); |
| 305 | }); |
| 306 | type = other.type; |
| 307 | } |
| 308 | |
| 309 | HighlightingContextData::Rule::Rule(const Rule &other) |
| 310 | : common(other.common) |
| 311 | { |
| 312 | dataRuleVisit(type: other.type, data1&: data, data2: other.data, visitor: [](auto &data1, auto &data2) { |
| 313 | using Data = std::remove_reference_t<decltype(data1)>; |
| 314 | new (&data1) Data(data2); |
| 315 | }); |
| 316 | type = other.type; |
| 317 | } |
| 318 | |
| 319 | HighlightingContextData::Rule::~Rule() |
| 320 | { |
| 321 | dataRuleVisit(type, data1&: data, data2&: data, visitor: [](auto &data, auto &) { |
| 322 | using Data = std::remove_reference_t<decltype(data)>; |
| 323 | data.~Data(); |
| 324 | }); |
| 325 | } |
| 326 | |
| 327 | void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader) |
| 328 | { |
| 329 | Q_ASSERT(reader.name() == QLatin1String("context" )); |
| 330 | Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); |
| 331 | |
| 332 | name = reader.attributes().value(qualifiedName: QLatin1String("name" )).toString(); |
| 333 | attribute = reader.attributes().value(qualifiedName: QLatin1String("attribute" )).toString(); |
| 334 | lineEndContext = reader.attributes().value(qualifiedName: QLatin1String("lineEndContext" )).toString(); |
| 335 | lineEmptyContext = reader.attributes().value(qualifiedName: QLatin1String("lineEmptyContext" )).toString(); |
| 336 | fallthroughContext = reader.attributes().value(qualifiedName: QLatin1String("fallthroughContext" )).toString(); |
| 337 | noIndentationBasedFolding = Xml::attrToBool(str: reader.attributes().value(qualifiedName: QLatin1String("noIndentationBasedFolding" ))); |
| 338 | stopEmptyLineContextSwitchLoop = Xml::attrToBool(str: reader.attributes().value(qualifiedName: QLatin1String("stopEmptyLineContextSwitchLoop" ))); |
| 339 | |
| 340 | rules.reserve(n: 8); |
| 341 | |
| 342 | reader.readNext(); |
| 343 | while (!reader.atEnd()) { |
| 344 | switch (reader.tokenType()) { |
| 345 | case QXmlStreamReader::StartElement: { |
| 346 | auto &rule = rules.emplace_back(); |
| 347 | if (!loadRule(defName, rule, reader)) { |
| 348 | rules.pop_back(); |
| 349 | } |
| 350 | // be done with this rule, skip all subelements, e.g. no longer supported sub-rules |
| 351 | reader.skipCurrentElement(); |
| 352 | reader.readNext(); |
| 353 | break; |
| 354 | } |
| 355 | case QXmlStreamReader::EndElement: |
| 356 | return; |
| 357 | default: |
| 358 | reader.readNext(); |
| 359 | break; |
| 360 | } |
| 361 | } |
| 362 | } |
| 363 | |