| 1 | /* |
| 2 | SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de> |
| 3 | SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org> |
| 4 | |
| 5 | SPDX-License-Identifier: LGPL-2.0-or-later |
| 6 | */ |
| 7 | |
| 8 | // BEGIN includes |
| 9 | #include "kateregexpsearch.h" |
| 10 | |
| 11 | #include "katepartdebug.h" // for LOG_KTE |
| 12 | |
| 13 | #include <ktexteditor/document.h> |
| 14 | // END includes |
| 15 | |
| 16 | // Turn debug messages on/off here |
| 17 | // #define FAST_DEBUG_ENABLE |
| 18 | |
| 19 | #ifdef FAST_DEBUG_ENABLE |
| 20 | #define FAST_DEBUG(x) qCDebug(LOG_KTE) << x |
| 21 | #else |
| 22 | #define FAST_DEBUG(x) |
| 23 | #endif |
| 24 | |
| 25 | class KateRegExpSearch::ReplacementStream |
| 26 | { |
| 27 | public: |
| 28 | struct counter { |
| 29 | counter(int value, int minWidth) |
| 30 | : value(value) |
| 31 | , minWidth(minWidth) |
| 32 | { |
| 33 | } |
| 34 | |
| 35 | const int value; |
| 36 | const int minWidth; |
| 37 | }; |
| 38 | |
| 39 | struct cap { |
| 40 | cap(int n) |
| 41 | : n(n) |
| 42 | { |
| 43 | } |
| 44 | |
| 45 | const int n; |
| 46 | }; |
| 47 | |
| 48 | enum CaseConversion { |
| 49 | upperCase, ///< \U ... uppercase from now on |
| 50 | upperCaseFirst, ///< \u ... uppercase the first letter |
| 51 | lowerCase, ///< \L ... lowercase from now on |
| 52 | lowerCaseFirst, ///< \l ... lowercase the first letter |
| 53 | keepCase ///< \E ... back to original case |
| 54 | }; |
| 55 | |
| 56 | public: |
| 57 | ReplacementStream(const QStringList &capturedTexts); |
| 58 | |
| 59 | QString str() const |
| 60 | { |
| 61 | return m_str; |
| 62 | } |
| 63 | |
| 64 | ReplacementStream &operator<<(const QString &); |
| 65 | ReplacementStream &operator<<(const counter &); |
| 66 | ReplacementStream &operator<<(const cap &); |
| 67 | ReplacementStream &operator<<(CaseConversion); |
| 68 | |
| 69 | private: |
| 70 | const QStringList m_capturedTexts; |
| 71 | CaseConversion m_caseConversion; |
| 72 | QString m_str; |
| 73 | }; |
| 74 | |
| 75 | KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts) |
| 76 | : m_capturedTexts(capturedTexts) |
| 77 | , m_caseConversion(keepCase) |
| 78 | { |
| 79 | } |
| 80 | |
| 81 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str) |
| 82 | { |
| 83 | switch (m_caseConversion) { |
| 84 | case upperCase: |
| 85 | // Copy as uppercase |
| 86 | m_str.append(s: str.toUpper()); |
| 87 | break; |
| 88 | |
| 89 | case upperCaseFirst: |
| 90 | if (str.length() > 0) { |
| 91 | m_str.append(c: str.at(i: 0).toUpper()); |
| 92 | m_str.append(v: QStringView(str).mid(pos: 1)); |
| 93 | m_caseConversion = keepCase; |
| 94 | } |
| 95 | break; |
| 96 | |
| 97 | case lowerCase: |
| 98 | // Copy as lowercase |
| 99 | m_str.append(s: str.toLower()); |
| 100 | break; |
| 101 | |
| 102 | case lowerCaseFirst: |
| 103 | if (str.length() > 0) { |
| 104 | m_str.append(c: str.at(i: 0).toLower()); |
| 105 | m_str.append(v: QStringView(str).mid(pos: 1)); |
| 106 | m_caseConversion = keepCase; |
| 107 | } |
| 108 | break; |
| 109 | |
| 110 | case keepCase: // FALLTHROUGH |
| 111 | default: |
| 112 | // Copy unmodified |
| 113 | m_str.append(s: str); |
| 114 | break; |
| 115 | } |
| 116 | |
| 117 | return *this; |
| 118 | } |
| 119 | |
| 120 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c) |
| 121 | { |
| 122 | // Zero padded counter value |
| 123 | m_str.append(QStringLiteral("%1" ).arg(a: c.value, fieldWidth: c.minWidth, base: 10, fillChar: QLatin1Char('0'))); |
| 124 | |
| 125 | return *this; |
| 126 | } |
| 127 | |
| 128 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap) |
| 129 | { |
| 130 | if (0 <= cap.n && cap.n < m_capturedTexts.size()) { |
| 131 | (*this) << m_capturedTexts[cap.n]; |
| 132 | } else { |
| 133 | // Insert just the number to be consistent with QRegExp ("\c" becomes "c") |
| 134 | m_str.append(s: QString::number(cap.n)); |
| 135 | } |
| 136 | |
| 137 | return *this; |
| 138 | } |
| 139 | |
| 140 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion) |
| 141 | { |
| 142 | m_caseConversion = caseConversion; |
| 143 | |
| 144 | return *this; |
| 145 | } |
| 146 | |
| 147 | // BEGIN d'tor, c'tor |
| 148 | // |
| 149 | // KateSearch Constructor |
| 150 | // |
| 151 | KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document) |
| 152 | : m_document(document) |
| 153 | { |
| 154 | } |
| 155 | |
| 156 | // helper structs for captures re-construction |
| 157 | struct TwoViewCursor { |
| 158 | int index; |
| 159 | int line; |
| 160 | int col; |
| 161 | }; |
| 162 | |
| 163 | struct IndexPair { |
| 164 | int openIndex; |
| 165 | int closeIndex; |
| 166 | }; |
| 167 | |
| 168 | QList<KTextEditor::Range> |
| 169 | KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options) |
| 170 | { |
| 171 | // Save regexes to avoid reconstructing regexes all the time |
| 172 | static QRegularExpression preRegex; |
| 173 | static QRegularExpression repairedRegex; |
| 174 | |
| 175 | // Returned if no matches are found |
| 176 | QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid()); |
| 177 | |
| 178 | // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the |
| 179 | // this method returning here if 'pattern' is empty. |
| 180 | if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) { |
| 181 | return noResult; |
| 182 | } |
| 183 | |
| 184 | // Always enable Unicode support |
| 185 | options |= QRegularExpression::UseUnicodePropertiesOption; |
| 186 | |
| 187 | if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) { |
| 188 | preRegex = QRegularExpression(pattern, options); |
| 189 | } |
| 190 | |
| 191 | // If repairPattern() is called on an invalid regex pattern it may cause asserts |
| 192 | // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern |
| 193 | // expects at least one character after a '\') |
| 194 | if (!preRegex.isValid()) { |
| 195 | return noResult; |
| 196 | } |
| 197 | |
| 198 | // detect pattern type (single- or mutli-line) |
| 199 | bool stillMultiLine; |
| 200 | const QString repairedPattern = repairPattern(pattern, stillMultiLine); |
| 201 | |
| 202 | // Enable multiline mode, so that the ^ and $ metacharacters in the pattern |
| 203 | // are allowed to match, respectively, immediately after and immediately |
| 204 | // before any newline in the subject string, as well as at the very beginning |
| 205 | // and at the very end of the subject string (see QRegularExpression docs). |
| 206 | // |
| 207 | // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange |
| 208 | // ends in the middle of a line, then a '$' won't match at that position. And |
| 209 | // matches that are out of the inputRange are rejected. |
| 210 | if (stillMultiLine) { |
| 211 | options |= QRegularExpression::MultilineOption; |
| 212 | } |
| 213 | |
| 214 | // check if anything changed at all |
| 215 | if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) { |
| 216 | repairedRegex.setPattern(repairedPattern); |
| 217 | repairedRegex.setPatternOptions(options); |
| 218 | } |
| 219 | if (!repairedRegex.isValid()) { |
| 220 | return noResult; |
| 221 | } |
| 222 | |
| 223 | const int rangeStartLine = inputRange.start().line(); |
| 224 | const int rangeStartCol = inputRange.start().column(); |
| 225 | |
| 226 | const int rangeEndLine = inputRange.end().line(); |
| 227 | const int rangeEndCol = inputRange.end().column(); |
| 228 | |
| 229 | if (stillMultiLine) { |
| 230 | const int rangeLineCount = rangeEndLine - rangeStartLine + 1; |
| 231 | FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")" ); |
| 232 | |
| 233 | const int docLineCount = m_document->lines(); |
| 234 | // nothing to do... |
| 235 | if (rangeStartLine >= docLineCount) { |
| 236 | return noResult; |
| 237 | } |
| 238 | |
| 239 | QList<int> lineLens(rangeLineCount); |
| 240 | int maxMatchOffset = 0; |
| 241 | |
| 242 | // all lines in the input range |
| 243 | QString wholeRange; |
| 244 | for (int i = 0; i < rangeLineCount; ++i) { |
| 245 | const int docLineIndex = rangeStartLine + i; |
| 246 | if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index |
| 247 | return noResult; |
| 248 | } |
| 249 | |
| 250 | const QString textLine = m_document->line(line: docLineIndex); |
| 251 | lineLens[i] = textLine.length(); |
| 252 | wholeRange.append(s: textLine); |
| 253 | |
| 254 | // This check is needed as some parts in vimode rely on this behaviour. |
| 255 | // We add an '\n' as a delimiter between lines in the range; but never after the |
| 256 | // last line as that would add an '\n' that isn't there in the original text, |
| 257 | // and can skew search results or hit an assert when accessing lineLens later |
| 258 | // in the code. |
| 259 | if (i != (rangeLineCount - 1)) { |
| 260 | wholeRange.append(c: QLatin1Char('\n')); |
| 261 | } |
| 262 | |
| 263 | // lineLens.at(i) + 1, because '\n' was added |
| 264 | maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1; |
| 265 | |
| 266 | FAST_DEBUG(" line" << i << "has length" << lineLens.at(i)); |
| 267 | } |
| 268 | |
| 269 | FAST_DEBUG("Max. match offset" << maxMatchOffset); |
| 270 | |
| 271 | QRegularExpressionMatch match; |
| 272 | bool found = false; |
| 273 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: wholeRange, offset: rangeStartCol); |
| 274 | |
| 275 | if (backwards) { |
| 276 | while (iter.hasNext()) { |
| 277 | QRegularExpressionMatch curMatch = iter.next(); |
| 278 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
| 279 | match.swap(other&: curMatch); |
| 280 | found = true; |
| 281 | } |
| 282 | } |
| 283 | } else { /* forwards */ |
| 284 | QRegularExpressionMatch curMatch; |
| 285 | if (iter.hasNext()) { |
| 286 | curMatch = iter.next(); |
| 287 | } |
| 288 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
| 289 | match.swap(other&: curMatch); |
| 290 | found = true; |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | if (!found) { |
| 295 | // no match |
| 296 | FAST_DEBUG("not found" ); |
| 297 | return noResult; |
| 298 | } |
| 299 | |
| 300 | // Capture groups: save opening and closing indices and build a map, |
| 301 | // the correct values will be written into it later |
| 302 | QMap<int, TwoViewCursor *> indicesToCursors; |
| 303 | const int numCaptures = repairedRegex.captureCount(); |
| 304 | QList<IndexPair> indexPairs(numCaptures + 1); |
| 305 | for (int c = 0; c <= numCaptures; ++c) { |
| 306 | const int openIndex = match.capturedStart(nth: c); |
| 307 | IndexPair &pair = indexPairs[c]; |
| 308 | if (openIndex == -1) { |
| 309 | // An invalid index indicates an empty capture group |
| 310 | pair.openIndex = -1; |
| 311 | pair.closeIndex = -1; |
| 312 | FAST_DEBUG("capture []" ); |
| 313 | } else { |
| 314 | const int closeIndex = match.capturedEnd(nth: c); |
| 315 | pair.openIndex = openIndex; |
| 316 | pair.closeIndex = closeIndex; |
| 317 | FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]" ); |
| 318 | |
| 319 | // each key no more than once |
| 320 | if (!indicesToCursors.contains(key: openIndex)) { |
| 321 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
| 322 | twoViewCursor->index = openIndex; |
| 323 | indicesToCursors.insert(key: openIndex, value: twoViewCursor); |
| 324 | FAST_DEBUG(" capture group start index added: " << openIndex); |
| 325 | } |
| 326 | if (!indicesToCursors.contains(key: closeIndex)) { |
| 327 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
| 328 | twoViewCursor->index = closeIndex; |
| 329 | indicesToCursors.insert(key: closeIndex, value: twoViewCursor); |
| 330 | FAST_DEBUG(" capture group end index added: " << closeIndex); |
| 331 | } |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | // find out where they belong |
| 336 | int curRelLine = 0; |
| 337 | int curRelCol = 0; |
| 338 | int curRelIndex = 0; |
| 339 | |
| 340 | for (TwoViewCursor *twoViewCursor : std::as_const(t&: indicesToCursors)) { |
| 341 | // forward to index, save line/col |
| 342 | const int index = twoViewCursor->index; |
| 343 | FAST_DEBUG("resolving position" << index); |
| 344 | |
| 345 | while (curRelIndex <= index) { |
| 346 | FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); |
| 347 | |
| 348 | const int curRelLineLen = lineLens.at(i: curRelLine); |
| 349 | const int curLineRemainder = curRelLineLen - curRelCol; |
| 350 | const int lineFeedIndex = curRelIndex + curLineRemainder; |
| 351 | if (index <= lineFeedIndex) { |
| 352 | if (index == lineFeedIndex) { |
| 353 | // on this line _at_ line feed |
| 354 | FAST_DEBUG(" on line feed" ); |
| 355 | const int absLine = curRelLine + rangeStartLine; |
| 356 | twoViewCursor->line = absLine; |
| 357 | twoViewCursor->col = curRelLineLen; |
| 358 | |
| 359 | // advance to next line |
| 360 | const int advance = (index - curRelIndex) + 1; |
| 361 | ++curRelLine; |
| 362 | curRelCol = 0; |
| 363 | curRelIndex += advance; |
| 364 | } else { // index < lineFeedIndex |
| 365 | // on this line _before_ line feed |
| 366 | FAST_DEBUG(" before line feed" ); |
| 367 | const int diff = (index - curRelIndex); |
| 368 | const int absLine = curRelLine + rangeStartLine; |
| 369 | const int absCol = curRelCol + diff; |
| 370 | twoViewCursor->line = absLine; |
| 371 | twoViewCursor->col = absCol; |
| 372 | |
| 373 | // advance on same line |
| 374 | const int advance = diff + 1; |
| 375 | curRelCol += advance; |
| 376 | curRelIndex += advance; |
| 377 | } |
| 378 | FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")" ); |
| 379 | } else { // if (index > lineFeedIndex) |
| 380 | // not on this line |
| 381 | // advance to next line |
| 382 | FAST_DEBUG(" not on this line" ); |
| 383 | ++curRelLine; |
| 384 | curRelCol = 0; |
| 385 | const int advance = curLineRemainder + 1; |
| 386 | curRelIndex += advance; |
| 387 | } |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | // build result array |
| 392 | QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid()); |
| 393 | for (int y = 0; y <= numCaptures; y++) { |
| 394 | IndexPair &pair = indexPairs[y]; |
| 395 | if (!(pair.openIndex == -1 || pair.closeIndex == -1)) { |
| 396 | const TwoViewCursor *const openCursors = indicesToCursors.value(key: pair.openIndex); |
| 397 | const TwoViewCursor *const closeCursors = indicesToCursors.value(key: pair.closeIndex); |
| 398 | const int startLine = openCursors->line; |
| 399 | const int startCol = openCursors->col; |
| 400 | const int endLine = closeCursors->line; |
| 401 | const int endCol = closeCursors->col; |
| 402 | FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")" ); |
| 403 | result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | // free structs allocated for indicesToCursors |
| 408 | qDeleteAll(c: indicesToCursors); |
| 409 | |
| 410 | return result; |
| 411 | } else { |
| 412 | // single-line regex search (forwards and backwards) |
| 413 | const int rangeStartCol = inputRange.start().column(); |
| 414 | const uint rangeEndCol = inputRange.end().column(); |
| 415 | |
| 416 | const int rangeStartLine = inputRange.start().line(); |
| 417 | const int rangeEndLine = inputRange.end().line(); |
| 418 | |
| 419 | const int forInit = backwards ? rangeEndLine : rangeStartLine; |
| 420 | |
| 421 | const int forInc = backwards ? -1 : +1; |
| 422 | |
| 423 | FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine)); |
| 424 | |
| 425 | for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) { |
| 426 | if (j < 0 || m_document->lines() <= j) { |
| 427 | FAST_DEBUG("searchText | line " << j << ": no" ); |
| 428 | return noResult; |
| 429 | } |
| 430 | |
| 431 | const QString textLine = m_document->line(line: j); |
| 432 | |
| 433 | const int offset = (j == rangeStartLine) ? rangeStartCol : 0; |
| 434 | const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length(); |
| 435 | |
| 436 | bool found = false; |
| 437 | |
| 438 | QRegularExpressionMatch match; |
| 439 | |
| 440 | if (backwards) { |
| 441 | // we can use globalMatchView as textLine is a const local above |
| 442 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatchView(subjectView: textLine, offset); |
| 443 | while (iter.hasNext()) { |
| 444 | QRegularExpressionMatch curMatch = iter.next(); |
| 445 | if (curMatch.capturedEnd() <= endLineMaxOffset) { |
| 446 | match.swap(other&: curMatch); |
| 447 | found = true; |
| 448 | } |
| 449 | } |
| 450 | } else { |
| 451 | // we can use matchView as textLine is a const local above |
| 452 | match = repairedRegex.matchView(subjectView: textLine, offset); |
| 453 | if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) { |
| 454 | found = true; |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | if (found) { |
| 459 | FAST_DEBUG("line " << j << ": yes" ); |
| 460 | |
| 461 | // build result array |
| 462 | const int numCaptures = repairedRegex.captureCount(); |
| 463 | QList<KTextEditor::Range> result(numCaptures + 1); |
| 464 | result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd()); |
| 465 | |
| 466 | FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart() << ")..(" << j << ", " << match.capturedEnd() << ")" ); |
| 467 | |
| 468 | for (int y = 1; y <= numCaptures; ++y) { |
| 469 | const int openIndex = match.capturedStart(nth: y); |
| 470 | |
| 471 | if (openIndex == -1) { |
| 472 | result[y] = KTextEditor::Range::invalid(); |
| 473 | |
| 474 | FAST_DEBUG("capture []" ); |
| 475 | } else { |
| 476 | const int closeIndex = match.capturedEnd(nth: y); |
| 477 | |
| 478 | FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")" ); |
| 479 | |
| 480 | result[y] = KTextEditor::Range(j, openIndex, j, closeIndex); |
| 481 | } |
| 482 | } |
| 483 | return result; |
| 484 | } else { |
| 485 | FAST_DEBUG("searchText | line " << j << ": no" ); |
| 486 | } |
| 487 | } |
| 488 | } |
| 489 | return noResult; |
| 490 | } |
| 491 | |
| 492 | /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text) |
| 493 | { |
| 494 | return buildReplacement(text, capturedTexts: QStringList(), replacementCounter: 0, replacementGoodies: false); |
| 495 | } |
| 496 | |
| 497 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter) |
| 498 | { |
| 499 | return buildReplacement(text, capturedTexts, replacementCounter, replacementGoodies: true); |
| 500 | } |
| 501 | |
| 502 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies) |
| 503 | { |
| 504 | // get input |
| 505 | const int inputLen = text.length(); |
| 506 | int input = 0; // walker index |
| 507 | |
| 508 | // prepare output |
| 509 | ReplacementStream out(capturedTexts); |
| 510 | |
| 511 | while (input < inputLen) { |
| 512 | switch (text[input].unicode()) { |
| 513 | case L'\n': |
| 514 | out << text[input]; |
| 515 | input++; |
| 516 | break; |
| 517 | |
| 518 | case L'\\': |
| 519 | if (input + 1 >= inputLen) { |
| 520 | // copy backslash |
| 521 | out << text[input]; |
| 522 | input++; |
| 523 | break; |
| 524 | } |
| 525 | |
| 526 | switch (text[input + 1].unicode()) { |
| 527 | case L'0': // "\0000".."\0377" |
| 528 | if (input + 4 >= inputLen) { |
| 529 | out << ReplacementStream::cap(0); |
| 530 | input += 2; |
| 531 | } else { |
| 532 | bool stripAndSkip = false; |
| 533 | const ushort text_2 = text[input + 2].unicode(); |
| 534 | if ((text_2 >= L'0') && (text_2 <= L'3')) { |
| 535 | const ushort text_3 = text[input + 3].unicode(); |
| 536 | if ((text_3 >= L'0') && (text_3 <= L'7')) { |
| 537 | const ushort text_4 = text[input + 4].unicode(); |
| 538 | if ((text_4 >= L'0') && (text_4 <= L'7')) { |
| 539 | int digits[3]; |
| 540 | for (int i = 0; i < 3; i++) { |
| 541 | digits[i] = 7 - (L'7' - text[input + 2 + i].unicode()); |
| 542 | } |
| 543 | const int ch = 64 * digits[0] + 8 * digits[1] + digits[2]; |
| 544 | out << QChar(ch); |
| 545 | input += 5; |
| 546 | } else { |
| 547 | stripAndSkip = true; |
| 548 | } |
| 549 | } else { |
| 550 | stripAndSkip = true; |
| 551 | } |
| 552 | } else { |
| 553 | stripAndSkip = true; |
| 554 | } |
| 555 | |
| 556 | if (stripAndSkip) { |
| 557 | out << ReplacementStream::cap(0); |
| 558 | input += 2; |
| 559 | } |
| 560 | } |
| 561 | break; |
| 562 | |
| 563 | // single letter captures \x |
| 564 | case L'1': |
| 565 | case L'2': |
| 566 | case L'3': |
| 567 | case L'4': |
| 568 | case L'5': |
| 569 | case L'6': |
| 570 | case L'7': |
| 571 | case L'8': |
| 572 | case L'9': |
| 573 | out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode())); |
| 574 | input += 2; |
| 575 | break; |
| 576 | |
| 577 | // multi letter captures \{xxxx} |
| 578 | case L'{': { |
| 579 | // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124 |
| 580 | int capture = 0; |
| 581 | int captureSize = 2; |
| 582 | while ((input + captureSize) < inputLen) { |
| 583 | const ushort nextDigit = text[input + captureSize].unicode(); |
| 584 | if ((nextDigit >= L'0') && (nextDigit <= L'9')) { |
| 585 | capture = (10 * capture) + (9 - (L'9' - nextDigit)); |
| 586 | ++captureSize; |
| 587 | continue; |
| 588 | } |
| 589 | if (nextDigit == L'}') { |
| 590 | ++captureSize; |
| 591 | break; |
| 592 | } |
| 593 | break; |
| 594 | } |
| 595 | out << ReplacementStream::cap(capture); |
| 596 | input += captureSize; |
| 597 | break; |
| 598 | } |
| 599 | |
| 600 | case L'E': // FALLTHROUGH |
| 601 | case L'L': // FALLTHROUGH |
| 602 | case L'l': // FALLTHROUGH |
| 603 | case L'U': // FALLTHROUGH |
| 604 | case L'u': |
| 605 | if (!replacementGoodies) { |
| 606 | // strip backslash ("\?" -> "?") |
| 607 | out << text[input + 1]; |
| 608 | } else { |
| 609 | // handle case switcher |
| 610 | switch (text[input + 1].unicode()) { |
| 611 | case L'L': |
| 612 | out << ReplacementStream::lowerCase; |
| 613 | break; |
| 614 | |
| 615 | case L'l': |
| 616 | out << ReplacementStream::lowerCaseFirst; |
| 617 | break; |
| 618 | |
| 619 | case L'U': |
| 620 | out << ReplacementStream::upperCase; |
| 621 | break; |
| 622 | |
| 623 | case L'u': |
| 624 | out << ReplacementStream::upperCaseFirst; |
| 625 | break; |
| 626 | |
| 627 | case L'E': // FALLTHROUGH |
| 628 | default: |
| 629 | out << ReplacementStream::keepCase; |
| 630 | } |
| 631 | } |
| 632 | input += 2; |
| 633 | break; |
| 634 | |
| 635 | case L'#': |
| 636 | if (!replacementGoodies) { |
| 637 | // strip backslash ("\?" -> "?") |
| 638 | out << text[input + 1]; |
| 639 | input += 2; |
| 640 | } else { |
| 641 | // handle replacement counter |
| 642 | // eat and count all following hash marks |
| 643 | // each hash stands for a leading zero: \### will produces 001, 002, ... |
| 644 | int minWidth = 1; |
| 645 | while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) { |
| 646 | minWidth++; |
| 647 | } |
| 648 | out << ReplacementStream::counter(replacementCounter, minWidth); |
| 649 | input += 1 + minWidth; |
| 650 | } |
| 651 | break; |
| 652 | |
| 653 | case L'a': |
| 654 | out << QChar(0x07); |
| 655 | input += 2; |
| 656 | break; |
| 657 | |
| 658 | case L'f': |
| 659 | out << QChar(0x0c); |
| 660 | input += 2; |
| 661 | break; |
| 662 | |
| 663 | case L'n': |
| 664 | out << QChar(0x0a); |
| 665 | input += 2; |
| 666 | break; |
| 667 | |
| 668 | case L'r': |
| 669 | out << QChar(0x0d); |
| 670 | input += 2; |
| 671 | break; |
| 672 | |
| 673 | case L't': |
| 674 | out << QChar(0x09); |
| 675 | input += 2; |
| 676 | break; |
| 677 | |
| 678 | case L'v': |
| 679 | out << QChar(0x0b); |
| 680 | input += 2; |
| 681 | break; |
| 682 | |
| 683 | case L'x': // "\x0000".."\xffff" |
| 684 | if (input + 5 >= inputLen) { |
| 685 | // strip backslash ("\x" -> "x") |
| 686 | out << text[input + 1]; |
| 687 | input += 2; |
| 688 | } else { |
| 689 | bool stripAndSkip = false; |
| 690 | const ushort text_2 = text[input + 2].unicode(); |
| 691 | if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) { |
| 692 | const ushort text_3 = text[input + 3].unicode(); |
| 693 | if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) { |
| 694 | const ushort text_4 = text[input + 4].unicode(); |
| 695 | if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) { |
| 696 | const ushort text_5 = text[input + 5].unicode(); |
| 697 | if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f')) |
| 698 | || ((text_5 >= L'A') && (text_5 <= L'F'))) { |
| 699 | int digits[4]; |
| 700 | for (int i = 0; i < 4; i++) { |
| 701 | const ushort cur = text[input + 2 + i].unicode(); |
| 702 | if ((cur >= L'0') && (cur <= L'9')) { |
| 703 | digits[i] = 9 - (L'9' - cur); |
| 704 | } else if ((cur >= L'a') && (cur <= L'f')) { |
| 705 | digits[i] = 15 - (L'f' - cur); |
| 706 | } else { // if ((cur >= L'A') && (cur <= L'F'))) |
| 707 | digits[i] = 15 - (L'F' - cur); |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3]; |
| 712 | out << QChar(ch); |
| 713 | input += 6; |
| 714 | } else { |
| 715 | stripAndSkip = true; |
| 716 | } |
| 717 | } else { |
| 718 | stripAndSkip = true; |
| 719 | } |
| 720 | } else { |
| 721 | stripAndSkip = true; |
| 722 | } |
| 723 | } |
| 724 | |
| 725 | if (stripAndSkip) { |
| 726 | // strip backslash ("\x" -> "x") |
| 727 | out << text[input + 1]; |
| 728 | input += 2; |
| 729 | } |
| 730 | } |
| 731 | break; |
| 732 | |
| 733 | default: |
| 734 | // strip backslash ("\?" -> "?") |
| 735 | out << text[input + 1]; |
| 736 | input += 2; |
| 737 | } |
| 738 | break; |
| 739 | |
| 740 | default: |
| 741 | out << text[input]; |
| 742 | input++; |
| 743 | } |
| 744 | } |
| 745 | |
| 746 | return out.str(); |
| 747 | } |
| 748 | |
| 749 | QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine) |
| 750 | { |
| 751 | // '\s' can make a pattern multi-line, it's replaced here with '[ \t]'; |
| 752 | // besides \s, the following characters can make a pattern multi-line: |
| 753 | // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0??? |
| 754 | // a multi-line pattern must not pass as single-line, the other |
| 755 | // way around will just result in slower searches and is therefore |
| 756 | // not as critical |
| 757 | |
| 758 | const int inputLen = pattern.length(); |
| 759 | const QStringView patternView{pattern}; |
| 760 | |
| 761 | // prepare output |
| 762 | QString output; |
| 763 | output.reserve(asize: 2 * inputLen + 1); // twice should be enough for the average case |
| 764 | |
| 765 | // parser state |
| 766 | bool insideClass = false; |
| 767 | |
| 768 | stillMultiLine = false; |
| 769 | int input = 0; |
| 770 | while (input < inputLen) { |
| 771 | if (insideClass) { |
| 772 | // wait for closing, unescaped ']' |
| 773 | switch (pattern[input].unicode()) { |
| 774 | case L'\\': |
| 775 | switch (pattern[input + 1].unicode()) { |
| 776 | case L'x': |
| 777 | if (input + 5 < inputLen) { |
| 778 | // copy "\x????" unmodified |
| 779 | output.append(v: patternView.mid(pos: input, n: 6)); |
| 780 | input += 6; |
| 781 | } else { |
| 782 | // copy "\x" unmodified |
| 783 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 784 | input += 2; |
| 785 | } |
| 786 | stillMultiLine = true; |
| 787 | break; |
| 788 | |
| 789 | case L'0': |
| 790 | if (input + 4 < inputLen) { |
| 791 | // copy "\0???" unmodified |
| 792 | output.append(v: patternView.mid(pos: input, n: 5)); |
| 793 | input += 5; |
| 794 | } else { |
| 795 | // copy "\0" unmodified |
| 796 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 797 | input += 2; |
| 798 | } |
| 799 | stillMultiLine = true; |
| 800 | break; |
| 801 | |
| 802 | case L's': |
| 803 | // replace "\s" with "[ \t]" |
| 804 | output.append(s: QLatin1String(" \\t" )); |
| 805 | input += 2; |
| 806 | break; |
| 807 | |
| 808 | case L'n': |
| 809 | stillMultiLine = true; |
| 810 | // FALLTROUGH |
| 811 | Q_FALLTHROUGH(); |
| 812 | |
| 813 | default: |
| 814 | // copy "\?" unmodified |
| 815 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 816 | input += 2; |
| 817 | } |
| 818 | break; |
| 819 | |
| 820 | case L']': |
| 821 | // copy "]" unmodified |
| 822 | insideClass = false; |
| 823 | output.append(c: pattern[input]); |
| 824 | ++input; |
| 825 | break; |
| 826 | |
| 827 | default: |
| 828 | // copy "?" unmodified |
| 829 | output.append(c: pattern[input]); |
| 830 | ++input; |
| 831 | } |
| 832 | } else { |
| 833 | switch (pattern[input].unicode()) { |
| 834 | case L'\\': |
| 835 | switch (pattern[input + 1].unicode()) { |
| 836 | case L'x': |
| 837 | if (input + 5 < inputLen) { |
| 838 | // copy "\x????" unmodified |
| 839 | output.append(v: patternView.mid(pos: input, n: 6)); |
| 840 | input += 6; |
| 841 | } else { |
| 842 | // copy "\x" unmodified |
| 843 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 844 | input += 2; |
| 845 | } |
| 846 | stillMultiLine = true; |
| 847 | break; |
| 848 | |
| 849 | case L'0': |
| 850 | if (input + 4 < inputLen) { |
| 851 | // copy "\0???" unmodified |
| 852 | output.append(v: patternView.mid(pos: input, n: 5)); |
| 853 | input += 5; |
| 854 | } else { |
| 855 | // copy "\0" unmodified |
| 856 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 857 | input += 2; |
| 858 | } |
| 859 | stillMultiLine = true; |
| 860 | break; |
| 861 | |
| 862 | case L's': |
| 863 | // replace "\s" with "[ \t]" |
| 864 | output.append(s: QLatin1String("[ \\t]" )); |
| 865 | input += 2; |
| 866 | break; |
| 867 | |
| 868 | case L'n': |
| 869 | stillMultiLine = true; |
| 870 | // FALLTROUGH |
| 871 | Q_FALLTHROUGH(); |
| 872 | default: |
| 873 | // copy "\?" unmodified |
| 874 | output.append(v: patternView.mid(pos: input, n: 2)); |
| 875 | input += 2; |
| 876 | } |
| 877 | break; |
| 878 | |
| 879 | case L'[': |
| 880 | // copy "[" unmodified |
| 881 | insideClass = true; |
| 882 | output.append(c: pattern[input]); |
| 883 | ++input; |
| 884 | break; |
| 885 | |
| 886 | default: |
| 887 | // copy "?" unmodified |
| 888 | output.append(c: pattern[input]); |
| 889 | ++input; |
| 890 | } |
| 891 | } |
| 892 | } |
| 893 | return output; |
| 894 | } |
| 895 | |
| 896 | // Kill our helpers again |
| 897 | #ifdef FAST_DEBUG_ENABLE |
| 898 | #undef FAST_DEBUG_ENABLE |
| 899 | #endif |
| 900 | #undef FAST_DEBUG |
| 901 | |