1/*
2 SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de>
3 SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8// BEGIN includes
9#include "kateregexpsearch.h"
10
11#include "katepartdebug.h" // for LOG_KTE
12
13#include <ktexteditor/document.h>
14// END includes
15
16// Turn debug messages on/off here
17// #define FAST_DEBUG_ENABLE
18
19#ifdef FAST_DEBUG_ENABLE
20#define FAST_DEBUG(x) qCDebug(LOG_KTE) << x
21#else
22#define FAST_DEBUG(x)
23#endif
24
25class KateRegExpSearch::ReplacementStream
26{
27public:
28 struct counter {
29 counter(int value, int minWidth)
30 : value(value)
31 , minWidth(minWidth)
32 {
33 }
34
35 const int value;
36 const int minWidth;
37 };
38
39 struct cap {
40 cap(int n)
41 : n(n)
42 {
43 }
44
45 const int n;
46 };
47
48 enum CaseConversion {
49 upperCase, ///< \U ... uppercase from now on
50 upperCaseFirst, ///< \u ... uppercase the first letter
51 lowerCase, ///< \L ... lowercase from now on
52 lowerCaseFirst, ///< \l ... lowercase the first letter
53 keepCase ///< \E ... back to original case
54 };
55
56public:
57 ReplacementStream(const QStringList &capturedTexts);
58
59 QString str() const
60 {
61 return m_str;
62 }
63
64 ReplacementStream &operator<<(const QString &);
65 ReplacementStream &operator<<(const counter &);
66 ReplacementStream &operator<<(const cap &);
67 ReplacementStream &operator<<(CaseConversion);
68
69private:
70 const QStringList m_capturedTexts;
71 CaseConversion m_caseConversion;
72 QString m_str;
73};
74
75KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts)
76 : m_capturedTexts(capturedTexts)
77 , m_caseConversion(keepCase)
78{
79}
80
81KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str)
82{
83 switch (m_caseConversion) {
84 case upperCase:
85 // Copy as uppercase
86 m_str.append(s: str.toUpper());
87 break;
88
89 case upperCaseFirst:
90 if (str.length() > 0) {
91 m_str.append(c: str.at(i: 0).toUpper());
92 m_str.append(v: QStringView(str).mid(pos: 1));
93 m_caseConversion = keepCase;
94 }
95 break;
96
97 case lowerCase:
98 // Copy as lowercase
99 m_str.append(s: str.toLower());
100 break;
101
102 case lowerCaseFirst:
103 if (str.length() > 0) {
104 m_str.append(c: str.at(i: 0).toLower());
105 m_str.append(v: QStringView(str).mid(pos: 1));
106 m_caseConversion = keepCase;
107 }
108 break;
109
110 case keepCase: // FALLTHROUGH
111 default:
112 // Copy unmodified
113 m_str.append(s: str);
114 break;
115 }
116
117 return *this;
118}
119
120KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c)
121{
122 // Zero padded counter value
123 m_str.append(QStringLiteral("%1").arg(a: c.value, fieldWidth: c.minWidth, base: 10, fillChar: QLatin1Char('0')));
124
125 return *this;
126}
127
128KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap)
129{
130 if (0 <= cap.n && cap.n < m_capturedTexts.size()) {
131 (*this) << m_capturedTexts[cap.n];
132 } else {
133 // Insert just the number to be consistent with QRegExp ("\c" becomes "c")
134 m_str.append(s: QString::number(cap.n));
135 }
136
137 return *this;
138}
139
140KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion)
141{
142 m_caseConversion = caseConversion;
143
144 return *this;
145}
146
147// BEGIN d'tor, c'tor
148//
149// KateSearch Constructor
150//
151KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document)
152 : m_document(document)
153{
154}
155
156// helper structs for captures re-construction
157struct TwoViewCursor {
158 int index;
159 int line;
160 int col;
161};
162
163struct IndexPair {
164 int openIndex;
165 int closeIndex;
166};
167
168QList<KTextEditor::Range>
169KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options)
170{
171 // Save regexes to avoid reconstructing regexes all the time
172 static QRegularExpression preRegex;
173 static QRegularExpression repairedRegex;
174
175 // Returned if no matches are found
176 QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid());
177
178 // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the
179 // this method returning here if 'pattern' is empty.
180 if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) {
181 return noResult;
182 }
183
184 // Always enable Unicode support
185 options |= QRegularExpression::UseUnicodePropertiesOption;
186
187 if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) {
188 preRegex = QRegularExpression(pattern, options);
189 }
190
191 // If repairPattern() is called on an invalid regex pattern it may cause asserts
192 // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern
193 // expects at least one character after a '\')
194 if (!preRegex.isValid()) {
195 return noResult;
196 }
197
198 // detect pattern type (single- or mutli-line)
199 bool stillMultiLine;
200 const QString repairedPattern = repairPattern(pattern, stillMultiLine);
201
202 // Enable multiline mode, so that the ^ and $ metacharacters in the pattern
203 // are allowed to match, respectively, immediately after and immediately
204 // before any newline in the subject string, as well as at the very beginning
205 // and at the very end of the subject string (see QRegularExpression docs).
206 //
207 // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange
208 // ends in the middle of a line, then a '$' won't match at that position. And
209 // matches that are out of the inputRange are rejected.
210 if (stillMultiLine) {
211 options |= QRegularExpression::MultilineOption;
212 }
213
214 // check if anything changed at all
215 if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) {
216 repairedRegex.setPattern(repairedPattern);
217 repairedRegex.setPatternOptions(options);
218 }
219 if (!repairedRegex.isValid()) {
220 return noResult;
221 }
222
223 const int rangeStartLine = inputRange.start().line();
224 const int rangeStartCol = inputRange.start().column();
225
226 const int rangeEndLine = inputRange.end().line();
227 const int rangeEndCol = inputRange.end().column();
228
229 if (stillMultiLine) {
230 const int rangeLineCount = rangeEndLine - rangeStartLine + 1;
231 FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")");
232
233 const int docLineCount = m_document->lines();
234 // nothing to do...
235 if (rangeStartLine >= docLineCount) {
236 return noResult;
237 }
238
239 QList<int> lineLens(rangeLineCount);
240 int maxMatchOffset = 0;
241
242 // all lines in the input range
243 QString wholeRange;
244 for (int i = 0; i < rangeLineCount; ++i) {
245 const int docLineIndex = rangeStartLine + i;
246 if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index
247 return noResult;
248 }
249
250 const QString textLine = m_document->line(line: docLineIndex);
251 lineLens[i] = textLine.length();
252 wholeRange.append(s: textLine);
253
254 // This check is needed as some parts in vimode rely on this behaviour.
255 // We add an '\n' as a delimiter between lines in the range; but never after the
256 // last line as that would add an '\n' that isn't there in the original text,
257 // and can skew search results or hit an assert when accessing lineLens later
258 // in the code.
259 if (i != (rangeLineCount - 1)) {
260 wholeRange.append(c: QLatin1Char('\n'));
261 }
262
263 // lineLens.at(i) + 1, because '\n' was added
264 maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1;
265
266 FAST_DEBUG(" line" << i << "has length" << lineLens.at(i));
267 }
268
269 FAST_DEBUG("Max. match offset" << maxMatchOffset);
270
271 QRegularExpressionMatch match;
272 bool found = false;
273 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: wholeRange, offset: rangeStartCol);
274
275 if (backwards) {
276 while (iter.hasNext()) {
277 QRegularExpressionMatch curMatch = iter.next();
278 if (curMatch.capturedEnd() <= maxMatchOffset) {
279 match.swap(other&: curMatch);
280 found = true;
281 }
282 }
283 } else { /* forwards */
284 QRegularExpressionMatch curMatch;
285 if (iter.hasNext()) {
286 curMatch = iter.next();
287 }
288 if (curMatch.capturedEnd() <= maxMatchOffset) {
289 match.swap(other&: curMatch);
290 found = true;
291 }
292 }
293
294 if (!found) {
295 // no match
296 FAST_DEBUG("not found");
297 return noResult;
298 }
299
300 // Capture groups: save opening and closing indices and build a map,
301 // the correct values will be written into it later
302 QMap<int, TwoViewCursor *> indicesToCursors;
303 const int numCaptures = repairedRegex.captureCount();
304 QList<IndexPair> indexPairs(numCaptures + 1);
305 for (int c = 0; c <= numCaptures; ++c) {
306 const int openIndex = match.capturedStart(nth: c);
307 IndexPair &pair = indexPairs[c];
308 if (openIndex == -1) {
309 // An invalid index indicates an empty capture group
310 pair.openIndex = -1;
311 pair.closeIndex = -1;
312 FAST_DEBUG("capture []");
313 } else {
314 const int closeIndex = match.capturedEnd(nth: c);
315 pair.openIndex = openIndex;
316 pair.closeIndex = closeIndex;
317 FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]");
318
319 // each key no more than once
320 if (!indicesToCursors.contains(key: openIndex)) {
321 TwoViewCursor *twoViewCursor = new TwoViewCursor;
322 twoViewCursor->index = openIndex;
323 indicesToCursors.insert(key: openIndex, value: twoViewCursor);
324 FAST_DEBUG(" capture group start index added: " << openIndex);
325 }
326 if (!indicesToCursors.contains(key: closeIndex)) {
327 TwoViewCursor *twoViewCursor = new TwoViewCursor;
328 twoViewCursor->index = closeIndex;
329 indicesToCursors.insert(key: closeIndex, value: twoViewCursor);
330 FAST_DEBUG(" capture group end index added: " << closeIndex);
331 }
332 }
333 }
334
335 // find out where they belong
336 int curRelLine = 0;
337 int curRelCol = 0;
338 int curRelIndex = 0;
339
340 for (TwoViewCursor *twoViewCursor : std::as_const(t&: indicesToCursors)) {
341 // forward to index, save line/col
342 const int index = twoViewCursor->index;
343 FAST_DEBUG("resolving position" << index);
344
345 while (curRelIndex <= index) {
346 FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex);
347
348 const int curRelLineLen = lineLens.at(i: curRelLine);
349 const int curLineRemainder = curRelLineLen - curRelCol;
350 const int lineFeedIndex = curRelIndex + curLineRemainder;
351 if (index <= lineFeedIndex) {
352 if (index == lineFeedIndex) {
353 // on this line _at_ line feed
354 FAST_DEBUG(" on line feed");
355 const int absLine = curRelLine + rangeStartLine;
356 twoViewCursor->line = absLine;
357 twoViewCursor->col = curRelLineLen;
358
359 // advance to next line
360 const int advance = (index - curRelIndex) + 1;
361 ++curRelLine;
362 curRelCol = 0;
363 curRelIndex += advance;
364 } else { // index < lineFeedIndex
365 // on this line _before_ line feed
366 FAST_DEBUG(" before line feed");
367 const int diff = (index - curRelIndex);
368 const int absLine = curRelLine + rangeStartLine;
369 const int absCol = curRelCol + diff;
370 twoViewCursor->line = absLine;
371 twoViewCursor->col = absCol;
372
373 // advance on same line
374 const int advance = diff + 1;
375 curRelCol += advance;
376 curRelIndex += advance;
377 }
378 FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")");
379 } else { // if (index > lineFeedIndex)
380 // not on this line
381 // advance to next line
382 FAST_DEBUG(" not on this line");
383 ++curRelLine;
384 curRelCol = 0;
385 const int advance = curLineRemainder + 1;
386 curRelIndex += advance;
387 }
388 }
389 }
390
391 // build result array
392 QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid());
393 for (int y = 0; y <= numCaptures; y++) {
394 IndexPair &pair = indexPairs[y];
395 if (!(pair.openIndex == -1 || pair.closeIndex == -1)) {
396 const TwoViewCursor *const openCursors = indicesToCursors.value(key: pair.openIndex);
397 const TwoViewCursor *const closeCursors = indicesToCursors.value(key: pair.closeIndex);
398 const int startLine = openCursors->line;
399 const int startCol = openCursors->col;
400 const int endLine = closeCursors->line;
401 const int endCol = closeCursors->col;
402 FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")");
403 result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol);
404 }
405 }
406
407 // free structs allocated for indicesToCursors
408 qDeleteAll(c: indicesToCursors);
409
410 return result;
411 } else {
412 // single-line regex search (forwards and backwards)
413 const int rangeStartCol = inputRange.start().column();
414 const uint rangeEndCol = inputRange.end().column();
415
416 const int rangeStartLine = inputRange.start().line();
417 const int rangeEndLine = inputRange.end().line();
418
419 const int forInit = backwards ? rangeEndLine : rangeStartLine;
420
421 const int forInc = backwards ? -1 : +1;
422
423 FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine));
424
425 for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) {
426 if (j < 0 || m_document->lines() <= j) {
427 FAST_DEBUG("searchText | line " << j << ": no");
428 return noResult;
429 }
430
431 const QString textLine = m_document->line(line: j);
432
433 const int offset = (j == rangeStartLine) ? rangeStartCol : 0;
434 const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length();
435
436 bool found = false;
437
438 QRegularExpressionMatch match;
439
440 if (backwards) {
441 // we can use globalMatchView as textLine is a const local above
442 QRegularExpressionMatchIterator iter = repairedRegex.globalMatchView(subjectView: textLine, offset);
443 while (iter.hasNext()) {
444 QRegularExpressionMatch curMatch = iter.next();
445 if (curMatch.capturedEnd() <= endLineMaxOffset) {
446 match.swap(other&: curMatch);
447 found = true;
448 }
449 }
450 } else {
451 // we can use matchView as textLine is a const local above
452 match = repairedRegex.matchView(subjectView: textLine, offset);
453 if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) {
454 found = true;
455 }
456 }
457
458 if (found) {
459 FAST_DEBUG("line " << j << ": yes");
460
461 // build result array
462 const int numCaptures = repairedRegex.captureCount();
463 QList<KTextEditor::Range> result(numCaptures + 1);
464 result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd());
465
466 FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart() << ")..(" << j << ", " << match.capturedEnd() << ")");
467
468 for (int y = 1; y <= numCaptures; ++y) {
469 const int openIndex = match.capturedStart(nth: y);
470
471 if (openIndex == -1) {
472 result[y] = KTextEditor::Range::invalid();
473
474 FAST_DEBUG("capture []");
475 } else {
476 const int closeIndex = match.capturedEnd(nth: y);
477
478 FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")");
479
480 result[y] = KTextEditor::Range(j, openIndex, j, closeIndex);
481 }
482 }
483 return result;
484 } else {
485 FAST_DEBUG("searchText | line " << j << ": no");
486 }
487 }
488 }
489 return noResult;
490}
491
492/*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text)
493{
494 return buildReplacement(text, capturedTexts: QStringList(), replacementCounter: 0, replacementGoodies: false);
495}
496
497/*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter)
498{
499 return buildReplacement(text, capturedTexts, replacementCounter, replacementGoodies: true);
500}
501
502/*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies)
503{
504 // get input
505 const int inputLen = text.length();
506 int input = 0; // walker index
507
508 // prepare output
509 ReplacementStream out(capturedTexts);
510
511 while (input < inputLen) {
512 switch (text[input].unicode()) {
513 case L'\n':
514 out << text[input];
515 input++;
516 break;
517
518 case L'\\':
519 if (input + 1 >= inputLen) {
520 // copy backslash
521 out << text[input];
522 input++;
523 break;
524 }
525
526 switch (text[input + 1].unicode()) {
527 case L'0': // "\0000".."\0377"
528 if (input + 4 >= inputLen) {
529 out << ReplacementStream::cap(0);
530 input += 2;
531 } else {
532 bool stripAndSkip = false;
533 const ushort text_2 = text[input + 2].unicode();
534 if ((text_2 >= L'0') && (text_2 <= L'3')) {
535 const ushort text_3 = text[input + 3].unicode();
536 if ((text_3 >= L'0') && (text_3 <= L'7')) {
537 const ushort text_4 = text[input + 4].unicode();
538 if ((text_4 >= L'0') && (text_4 <= L'7')) {
539 int digits[3];
540 for (int i = 0; i < 3; i++) {
541 digits[i] = 7 - (L'7' - text[input + 2 + i].unicode());
542 }
543 const int ch = 64 * digits[0] + 8 * digits[1] + digits[2];
544 out << QChar(ch);
545 input += 5;
546 } else {
547 stripAndSkip = true;
548 }
549 } else {
550 stripAndSkip = true;
551 }
552 } else {
553 stripAndSkip = true;
554 }
555
556 if (stripAndSkip) {
557 out << ReplacementStream::cap(0);
558 input += 2;
559 }
560 }
561 break;
562
563 // single letter captures \x
564 case L'1':
565 case L'2':
566 case L'3':
567 case L'4':
568 case L'5':
569 case L'6':
570 case L'7':
571 case L'8':
572 case L'9':
573 out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode()));
574 input += 2;
575 break;
576
577 // multi letter captures \{xxxx}
578 case L'{': {
579 // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124
580 int capture = 0;
581 int captureSize = 2;
582 while ((input + captureSize) < inputLen) {
583 const ushort nextDigit = text[input + captureSize].unicode();
584 if ((nextDigit >= L'0') && (nextDigit <= L'9')) {
585 capture = (10 * capture) + (9 - (L'9' - nextDigit));
586 ++captureSize;
587 continue;
588 }
589 if (nextDigit == L'}') {
590 ++captureSize;
591 break;
592 }
593 break;
594 }
595 out << ReplacementStream::cap(capture);
596 input += captureSize;
597 break;
598 }
599
600 case L'E': // FALLTHROUGH
601 case L'L': // FALLTHROUGH
602 case L'l': // FALLTHROUGH
603 case L'U': // FALLTHROUGH
604 case L'u':
605 if (!replacementGoodies) {
606 // strip backslash ("\?" -> "?")
607 out << text[input + 1];
608 } else {
609 // handle case switcher
610 switch (text[input + 1].unicode()) {
611 case L'L':
612 out << ReplacementStream::lowerCase;
613 break;
614
615 case L'l':
616 out << ReplacementStream::lowerCaseFirst;
617 break;
618
619 case L'U':
620 out << ReplacementStream::upperCase;
621 break;
622
623 case L'u':
624 out << ReplacementStream::upperCaseFirst;
625 break;
626
627 case L'E': // FALLTHROUGH
628 default:
629 out << ReplacementStream::keepCase;
630 }
631 }
632 input += 2;
633 break;
634
635 case L'#':
636 if (!replacementGoodies) {
637 // strip backslash ("\?" -> "?")
638 out << text[input + 1];
639 input += 2;
640 } else {
641 // handle replacement counter
642 // eat and count all following hash marks
643 // each hash stands for a leading zero: \### will produces 001, 002, ...
644 int minWidth = 1;
645 while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) {
646 minWidth++;
647 }
648 out << ReplacementStream::counter(replacementCounter, minWidth);
649 input += 1 + minWidth;
650 }
651 break;
652
653 case L'a':
654 out << QChar(0x07);
655 input += 2;
656 break;
657
658 case L'f':
659 out << QChar(0x0c);
660 input += 2;
661 break;
662
663 case L'n':
664 out << QChar(0x0a);
665 input += 2;
666 break;
667
668 case L'r':
669 out << QChar(0x0d);
670 input += 2;
671 break;
672
673 case L't':
674 out << QChar(0x09);
675 input += 2;
676 break;
677
678 case L'v':
679 out << QChar(0x0b);
680 input += 2;
681 break;
682
683 case L'x': // "\x0000".."\xffff"
684 if (input + 5 >= inputLen) {
685 // strip backslash ("\x" -> "x")
686 out << text[input + 1];
687 input += 2;
688 } else {
689 bool stripAndSkip = false;
690 const ushort text_2 = text[input + 2].unicode();
691 if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) {
692 const ushort text_3 = text[input + 3].unicode();
693 if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) {
694 const ushort text_4 = text[input + 4].unicode();
695 if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) {
696 const ushort text_5 = text[input + 5].unicode();
697 if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f'))
698 || ((text_5 >= L'A') && (text_5 <= L'F'))) {
699 int digits[4];
700 for (int i = 0; i < 4; i++) {
701 const ushort cur = text[input + 2 + i].unicode();
702 if ((cur >= L'0') && (cur <= L'9')) {
703 digits[i] = 9 - (L'9' - cur);
704 } else if ((cur >= L'a') && (cur <= L'f')) {
705 digits[i] = 15 - (L'f' - cur);
706 } else { // if ((cur >= L'A') && (cur <= L'F')))
707 digits[i] = 15 - (L'F' - cur);
708 }
709 }
710
711 const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3];
712 out << QChar(ch);
713 input += 6;
714 } else {
715 stripAndSkip = true;
716 }
717 } else {
718 stripAndSkip = true;
719 }
720 } else {
721 stripAndSkip = true;
722 }
723 }
724
725 if (stripAndSkip) {
726 // strip backslash ("\x" -> "x")
727 out << text[input + 1];
728 input += 2;
729 }
730 }
731 break;
732
733 default:
734 // strip backslash ("\?" -> "?")
735 out << text[input + 1];
736 input += 2;
737 }
738 break;
739
740 default:
741 out << text[input];
742 input++;
743 }
744 }
745
746 return out.str();
747}
748
749QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine)
750{
751 // '\s' can make a pattern multi-line, it's replaced here with '[ \t]';
752 // besides \s, the following characters can make a pattern multi-line:
753 // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0???
754 // a multi-line pattern must not pass as single-line, the other
755 // way around will just result in slower searches and is therefore
756 // not as critical
757
758 const int inputLen = pattern.length();
759 const QStringView patternView{pattern};
760
761 // prepare output
762 QString output;
763 output.reserve(asize: 2 * inputLen + 1); // twice should be enough for the average case
764
765 // parser state
766 bool insideClass = false;
767
768 stillMultiLine = false;
769 int input = 0;
770 while (input < inputLen) {
771 if (insideClass) {
772 // wait for closing, unescaped ']'
773 switch (pattern[input].unicode()) {
774 case L'\\':
775 switch (pattern[input + 1].unicode()) {
776 case L'x':
777 if (input + 5 < inputLen) {
778 // copy "\x????" unmodified
779 output.append(v: patternView.mid(pos: input, n: 6));
780 input += 6;
781 } else {
782 // copy "\x" unmodified
783 output.append(v: patternView.mid(pos: input, n: 2));
784 input += 2;
785 }
786 stillMultiLine = true;
787 break;
788
789 case L'0':
790 if (input + 4 < inputLen) {
791 // copy "\0???" unmodified
792 output.append(v: patternView.mid(pos: input, n: 5));
793 input += 5;
794 } else {
795 // copy "\0" unmodified
796 output.append(v: patternView.mid(pos: input, n: 2));
797 input += 2;
798 }
799 stillMultiLine = true;
800 break;
801
802 case L's':
803 // replace "\s" with "[ \t]"
804 output.append(s: QLatin1String(" \\t"));
805 input += 2;
806 break;
807
808 case L'n':
809 stillMultiLine = true;
810 // FALLTROUGH
811 Q_FALLTHROUGH();
812
813 default:
814 // copy "\?" unmodified
815 output.append(v: patternView.mid(pos: input, n: 2));
816 input += 2;
817 }
818 break;
819
820 case L']':
821 // copy "]" unmodified
822 insideClass = false;
823 output.append(c: pattern[input]);
824 ++input;
825 break;
826
827 default:
828 // copy "?" unmodified
829 output.append(c: pattern[input]);
830 ++input;
831 }
832 } else {
833 switch (pattern[input].unicode()) {
834 case L'\\':
835 switch (pattern[input + 1].unicode()) {
836 case L'x':
837 if (input + 5 < inputLen) {
838 // copy "\x????" unmodified
839 output.append(v: patternView.mid(pos: input, n: 6));
840 input += 6;
841 } else {
842 // copy "\x" unmodified
843 output.append(v: patternView.mid(pos: input, n: 2));
844 input += 2;
845 }
846 stillMultiLine = true;
847 break;
848
849 case L'0':
850 if (input + 4 < inputLen) {
851 // copy "\0???" unmodified
852 output.append(v: patternView.mid(pos: input, n: 5));
853 input += 5;
854 } else {
855 // copy "\0" unmodified
856 output.append(v: patternView.mid(pos: input, n: 2));
857 input += 2;
858 }
859 stillMultiLine = true;
860 break;
861
862 case L's':
863 // replace "\s" with "[ \t]"
864 output.append(s: QLatin1String("[ \\t]"));
865 input += 2;
866 break;
867
868 case L'n':
869 stillMultiLine = true;
870 // FALLTROUGH
871 Q_FALLTHROUGH();
872 default:
873 // copy "\?" unmodified
874 output.append(v: patternView.mid(pos: input, n: 2));
875 input += 2;
876 }
877 break;
878
879 case L'[':
880 // copy "[" unmodified
881 insideClass = true;
882 output.append(c: pattern[input]);
883 ++input;
884 break;
885
886 default:
887 // copy "?" unmodified
888 output.append(c: pattern[input]);
889 ++input;
890 }
891 }
892 }
893 return output;
894}
895
896// Kill our helpers again
897#ifdef FAST_DEBUG_ENABLE
898#undef FAST_DEBUG_ENABLE
899#endif
900#undef FAST_DEBUG
901

source code of ktexteditor/src/search/kateregexpsearch.cpp