1/*
2 SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de>
3 SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8// BEGIN includes
9#include "kateregexpsearch.h"
10
11#include <ktexteditor/document.h>
12// END includes
13
14// Turn debug messages on/off here
15// #define FAST_DEBUG_ENABLE
16
17#ifdef FAST_DEBUG_ENABLE
18#define FAST_DEBUG(x) qCDebug(LOG_KTE) << x
19#else
20#define FAST_DEBUG(x)
21#endif
22
23class KateRegExpSearch::ReplacementStream
24{
25public:
26 struct counter {
27 counter(int value, int minWidth)
28 : value(value)
29 , minWidth(minWidth)
30 {
31 }
32
33 const int value;
34 const int minWidth;
35 };
36
37 struct cap {
38 cap(int n)
39 : n(n)
40 {
41 }
42
43 const int n;
44 };
45
46 enum CaseConversion {
47 upperCase, ///< \U ... uppercase from now on
48 upperCaseFirst, ///< \u ... uppercase the first letter
49 lowerCase, ///< \L ... lowercase from now on
50 lowerCaseFirst, ///< \l ... lowercase the first letter
51 keepCase ///< \E ... back to original case
52 };
53
54public:
55 ReplacementStream(const QStringList &capturedTexts);
56
57 QString str() const
58 {
59 return m_str;
60 }
61
62 ReplacementStream &operator<<(const QString &);
63 ReplacementStream &operator<<(const counter &);
64 ReplacementStream &operator<<(const cap &);
65 ReplacementStream &operator<<(CaseConversion);
66
67private:
68 const QStringList m_capturedTexts;
69 CaseConversion m_caseConversion;
70 QString m_str;
71};
72
73KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts)
74 : m_capturedTexts(capturedTexts)
75 , m_caseConversion(keepCase)
76{
77}
78
79KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str)
80{
81 switch (m_caseConversion) {
82 case upperCase:
83 // Copy as uppercase
84 m_str.append(s: str.toUpper());
85 break;
86
87 case upperCaseFirst:
88 if (str.length() > 0) {
89 m_str.append(c: str.at(i: 0).toUpper());
90 m_str.append(v: QStringView(str).mid(pos: 1));
91 m_caseConversion = keepCase;
92 }
93 break;
94
95 case lowerCase:
96 // Copy as lowercase
97 m_str.append(s: str.toLower());
98 break;
99
100 case lowerCaseFirst:
101 if (str.length() > 0) {
102 m_str.append(c: str.at(i: 0).toLower());
103 m_str.append(v: QStringView(str).mid(pos: 1));
104 m_caseConversion = keepCase;
105 }
106 break;
107
108 case keepCase: // FALLTHROUGH
109 default:
110 // Copy unmodified
111 m_str.append(s: str);
112 break;
113 }
114
115 return *this;
116}
117
118KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c)
119{
120 // Zero padded counter value
121 m_str.append(QStringLiteral("%1").arg(a: c.value, fieldWidth: c.minWidth, base: 10, fillChar: QLatin1Char('0')));
122
123 return *this;
124}
125
126KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap)
127{
128 if (0 <= cap.n && cap.n < m_capturedTexts.size()) {
129 (*this) << m_capturedTexts[cap.n];
130 } else {
131 // Insert just the number to be consistent with QRegExp ("\c" becomes "c")
132 m_str.append(s: QString::number(cap.n));
133 }
134
135 return *this;
136}
137
138KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion)
139{
140 m_caseConversion = caseConversion;
141
142 return *this;
143}
144
145// BEGIN d'tor, c'tor
146//
147// KateSearch Constructor
148//
149KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document)
150 : m_document(document)
151{
152}
153
154// helper structs for captures re-construction
155struct TwoViewCursor {
156 int index;
157 int line;
158 int col;
159};
160
161struct IndexPair {
162 int openIndex;
163 int closeIndex;
164};
165
166QList<KTextEditor::Range>
167KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options)
168{
169 // Save regexes to avoid reconstructing regexes all the time
170 static QRegularExpression preRegex;
171 static QRegularExpression repairedRegex;
172
173 // Returned if no matches are found
174 QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid());
175
176 // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the
177 // this method returning here if 'pattern' is empty.
178 if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) {
179 return noResult;
180 }
181
182 // Always enable Unicode support
183 options |= QRegularExpression::UseUnicodePropertiesOption;
184
185 if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) {
186 preRegex = QRegularExpression(pattern, options);
187 }
188
189 // If repairPattern() is called on an invalid regex pattern it may cause asserts
190 // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern
191 // expects at least one character after a '\')
192 if (!preRegex.isValid()) {
193 return noResult;
194 }
195
196 // detect pattern type (single- or mutli-line)
197 bool stillMultiLine;
198 const QString repairedPattern = repairPattern(pattern, stillMultiLine);
199
200 // Enable multiline mode, so that the ^ and $ metacharacters in the pattern
201 // are allowed to match, respectively, immediately after and immediately
202 // before any newline in the subject string, as well as at the very beginning
203 // and at the very end of the subject string (see QRegularExpression docs).
204 //
205 // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange
206 // ends in the middle of a line, then a '$' won't match at that position. And
207 // matches that are out of the inputRange are rejected.
208 if (stillMultiLine) {
209 options |= QRegularExpression::MultilineOption;
210 }
211
212 // check if anything changed at all
213 if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) {
214 repairedRegex.setPattern(repairedPattern);
215 repairedRegex.setPatternOptions(options);
216 }
217 if (!repairedRegex.isValid()) {
218 return noResult;
219 }
220
221 const int rangeStartLine = inputRange.start().line();
222 const int rangeStartCol = inputRange.start().column();
223
224 const int rangeEndLine = inputRange.end().line();
225 const int rangeEndCol = inputRange.end().column();
226
227 if (stillMultiLine) {
228 const int rangeLineCount = rangeEndLine - rangeStartLine + 1;
229 FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")");
230
231 const int docLineCount = m_document->lines();
232 // nothing to do...
233 if (rangeStartLine >= docLineCount) {
234 return noResult;
235 }
236
237 QList<int> lineLens(rangeLineCount);
238 int maxMatchOffset = 0;
239
240 // all lines in the input range
241 QString wholeRange;
242 for (int i = 0; i < rangeLineCount; ++i) {
243 const int docLineIndex = rangeStartLine + i;
244 if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index
245 return noResult;
246 }
247
248 const QString textLine = m_document->line(line: docLineIndex);
249 lineLens[i] = textLine.length();
250 wholeRange.append(s: textLine);
251
252 // This check is needed as some parts in vimode rely on this behaviour.
253 // We add an '\n' as a delimiter between lines in the range; but never after the
254 // last line as that would add an '\n' that isn't there in the original text,
255 // and can skew search results or hit an assert when accessing lineLens later
256 // in the code.
257 if (i != (rangeLineCount - 1)) {
258 wholeRange.append(c: QLatin1Char('\n'));
259 }
260
261 // lineLens.at(i) + 1, because '\n' was added
262 maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1;
263
264 FAST_DEBUG(" line" << i << "has length" << lineLens.at(i));
265 }
266
267 FAST_DEBUG("Max. match offset" << maxMatchOffset);
268
269 QRegularExpressionMatch match;
270 bool found = false;
271 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: wholeRange, offset: rangeStartCol);
272
273 if (backwards) {
274 while (iter.hasNext()) {
275 QRegularExpressionMatch curMatch = iter.next();
276 if (curMatch.capturedEnd() <= maxMatchOffset) {
277 match.swap(other&: curMatch);
278 found = true;
279 }
280 }
281 } else { /* forwards */
282 QRegularExpressionMatch curMatch;
283 if (iter.hasNext()) {
284 curMatch = iter.next();
285 }
286 if (curMatch.capturedEnd() <= maxMatchOffset) {
287 match.swap(other&: curMatch);
288 found = true;
289 }
290 }
291
292 if (!found) {
293 // no match
294 FAST_DEBUG("not found");
295 return noResult;
296 }
297
298 // Capture groups: save opening and closing indices and build a map,
299 // the correct values will be written into it later
300 QMap<int, TwoViewCursor *> indicesToCursors;
301 const int numCaptures = repairedRegex.captureCount();
302 QList<IndexPair> indexPairs(numCaptures + 1);
303 for (int c = 0; c <= numCaptures; ++c) {
304 const int openIndex = match.capturedStart(nth: c);
305 IndexPair &pair = indexPairs[c];
306 if (openIndex == -1) {
307 // An invalid index indicates an empty capture group
308 pair.openIndex = -1;
309 pair.closeIndex = -1;
310 FAST_DEBUG("capture []");
311 } else {
312 const int closeIndex = match.capturedEnd(nth: c);
313 pair.openIndex = openIndex;
314 pair.closeIndex = closeIndex;
315 FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]");
316
317 // each key no more than once
318 if (!indicesToCursors.contains(key: openIndex)) {
319 TwoViewCursor *twoViewCursor = new TwoViewCursor;
320 twoViewCursor->index = openIndex;
321 indicesToCursors.insert(key: openIndex, value: twoViewCursor);
322 FAST_DEBUG(" capture group start index added: " << openIndex);
323 }
324 if (!indicesToCursors.contains(key: closeIndex)) {
325 TwoViewCursor *twoViewCursor = new TwoViewCursor;
326 twoViewCursor->index = closeIndex;
327 indicesToCursors.insert(key: closeIndex, value: twoViewCursor);
328 FAST_DEBUG(" capture group end index added: " << closeIndex);
329 }
330 }
331 }
332
333 // find out where they belong
334 int curRelLine = 0;
335 int curRelCol = 0;
336 int curRelIndex = 0;
337
338 for (TwoViewCursor *twoViewCursor : std::as_const(t&: indicesToCursors)) {
339 // forward to index, save line/col
340 const int index = twoViewCursor->index;
341 FAST_DEBUG("resolving position" << index);
342
343 while (curRelIndex <= index) {
344 FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex);
345
346 const int curRelLineLen = lineLens.at(i: curRelLine);
347 const int curLineRemainder = curRelLineLen - curRelCol;
348 const int lineFeedIndex = curRelIndex + curLineRemainder;
349 if (index <= lineFeedIndex) {
350 if (index == lineFeedIndex) {
351 // on this line _at_ line feed
352 FAST_DEBUG(" on line feed");
353 const int absLine = curRelLine + rangeStartLine;
354 twoViewCursor->line = absLine;
355 twoViewCursor->col = curRelLineLen;
356
357 // advance to next line
358 const int advance = (index - curRelIndex) + 1;
359 ++curRelLine;
360 curRelCol = 0;
361 curRelIndex += advance;
362 } else { // index < lineFeedIndex
363 // on this line _before_ line feed
364 FAST_DEBUG(" before line feed");
365 const int diff = (index - curRelIndex);
366 const int absLine = curRelLine + rangeStartLine;
367 const int absCol = curRelCol + diff;
368 twoViewCursor->line = absLine;
369 twoViewCursor->col = absCol;
370
371 // advance on same line
372 const int advance = diff + 1;
373 curRelCol += advance;
374 curRelIndex += advance;
375 }
376 FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")");
377 } else { // if (index > lineFeedIndex)
378 // not on this line
379 // advance to next line
380 FAST_DEBUG(" not on this line");
381 ++curRelLine;
382 curRelCol = 0;
383 const int advance = curLineRemainder + 1;
384 curRelIndex += advance;
385 }
386 }
387 }
388
389 // build result array
390 QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid());
391 for (int y = 0; y <= numCaptures; y++) {
392 IndexPair &pair = indexPairs[y];
393 if (!(pair.openIndex == -1 || pair.closeIndex == -1)) {
394 const TwoViewCursor *const openCursors = indicesToCursors.value(key: pair.openIndex);
395 const TwoViewCursor *const closeCursors = indicesToCursors.value(key: pair.closeIndex);
396 const int startLine = openCursors->line;
397 const int startCol = openCursors->col;
398 const int endLine = closeCursors->line;
399 const int endCol = closeCursors->col;
400 FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")");
401 result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol);
402 }
403 }
404
405 // free structs allocated for indicesToCursors
406 qDeleteAll(c: indicesToCursors);
407
408 return result;
409 } else {
410 // single-line regex search (forwards and backwards)
411 const int rangeStartCol = inputRange.start().column();
412 const uint rangeEndCol = inputRange.end().column();
413
414 const int rangeStartLine = inputRange.start().line();
415 const int rangeEndLine = inputRange.end().line();
416
417 const int forInit = backwards ? rangeEndLine : rangeStartLine;
418
419 const int forInc = backwards ? -1 : +1;
420
421 FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine));
422
423 for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) {
424 if (j < 0 || m_document->lines() <= j) {
425 FAST_DEBUG("searchText | line " << j << ": no");
426 return noResult;
427 }
428
429 const QString textLine = m_document->line(line: j);
430
431 const int offset = (j == rangeStartLine) ? rangeStartCol : 0;
432 const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length();
433
434 bool found = false;
435
436 QRegularExpressionMatch match;
437
438 if (backwards) {
439 QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: textLine, offset);
440 while (iter.hasNext()) {
441 QRegularExpressionMatch curMatch = iter.next();
442 if (curMatch.capturedEnd() <= endLineMaxOffset) {
443 match.swap(other&: curMatch);
444 found = true;
445 }
446 }
447 } else {
448 match = repairedRegex.match(subject: textLine, offset);
449 if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) {
450 found = true;
451 }
452 }
453
454 if (found) {
455 FAST_DEBUG("line " << j << ": yes");
456
457 // build result array
458 const int numCaptures = repairedRegex.captureCount();
459 QList<KTextEditor::Range> result(numCaptures + 1);
460 result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd());
461
462 FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart << ")..(" << j << ", " << match.capturedEnd() << ")");
463
464 for (int y = 1; y <= numCaptures; ++y) {
465 const int openIndex = match.capturedStart(nth: y);
466
467 if (openIndex == -1) {
468 result[y] = KTextEditor::Range::invalid();
469
470 FAST_DEBUG("capture []");
471 } else {
472 const int closeIndex = match.capturedEnd(nth: y);
473
474 FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")");
475
476 result[y] = KTextEditor::Range(j, openIndex, j, closeIndex);
477 }
478 }
479 return result;
480 } else {
481 FAST_DEBUG("searchText | line " << j << ": no");
482 }
483 }
484 }
485 return noResult;
486}
487
488/*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text)
489{
490 return buildReplacement(text, capturedTexts: QStringList(), replacementCounter: 0, replacementGoodies: false);
491}
492
493/*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter)
494{
495 return buildReplacement(text, capturedTexts, replacementCounter, replacementGoodies: true);
496}
497
498/*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies)
499{
500 // get input
501 const int inputLen = text.length();
502 int input = 0; // walker index
503
504 // prepare output
505 ReplacementStream out(capturedTexts);
506
507 while (input < inputLen) {
508 switch (text[input].unicode()) {
509 case L'\n':
510 out << text[input];
511 input++;
512 break;
513
514 case L'\\':
515 if (input + 1 >= inputLen) {
516 // copy backslash
517 out << text[input];
518 input++;
519 break;
520 }
521
522 switch (text[input + 1].unicode()) {
523 case L'0': // "\0000".."\0377"
524 if (input + 4 >= inputLen) {
525 out << ReplacementStream::cap(0);
526 input += 2;
527 } else {
528 bool stripAndSkip = false;
529 const ushort text_2 = text[input + 2].unicode();
530 if ((text_2 >= L'0') && (text_2 <= L'3')) {
531 const ushort text_3 = text[input + 3].unicode();
532 if ((text_3 >= L'0') && (text_3 <= L'7')) {
533 const ushort text_4 = text[input + 4].unicode();
534 if ((text_4 >= L'0') && (text_4 <= L'7')) {
535 int digits[3];
536 for (int i = 0; i < 3; i++) {
537 digits[i] = 7 - (L'7' - text[input + 2 + i].unicode());
538 }
539 const int ch = 64 * digits[0] + 8 * digits[1] + digits[2];
540 out << QChar(ch);
541 input += 5;
542 } else {
543 stripAndSkip = true;
544 }
545 } else {
546 stripAndSkip = true;
547 }
548 } else {
549 stripAndSkip = true;
550 }
551
552 if (stripAndSkip) {
553 out << ReplacementStream::cap(0);
554 input += 2;
555 }
556 }
557 break;
558
559 // single letter captures \x
560 case L'1':
561 case L'2':
562 case L'3':
563 case L'4':
564 case L'5':
565 case L'6':
566 case L'7':
567 case L'8':
568 case L'9':
569 out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode()));
570 input += 2;
571 break;
572
573 // multi letter captures \{xxxx}
574 case L'{': {
575 // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124
576 int capture = 0;
577 int captureSize = 2;
578 while ((input + captureSize) < inputLen) {
579 const ushort nextDigit = text[input + captureSize].unicode();
580 if ((nextDigit >= L'0') && (nextDigit <= L'9')) {
581 capture = (10 * capture) + (9 - (L'9' - nextDigit));
582 ++captureSize;
583 continue;
584 }
585 if (nextDigit == L'}') {
586 ++captureSize;
587 break;
588 }
589 break;
590 }
591 out << ReplacementStream::cap(capture);
592 input += captureSize;
593 break;
594 }
595
596 case L'E': // FALLTHROUGH
597 case L'L': // FALLTHROUGH
598 case L'l': // FALLTHROUGH
599 case L'U': // FALLTHROUGH
600 case L'u':
601 if (!replacementGoodies) {
602 // strip backslash ("\?" -> "?")
603 out << text[input + 1];
604 } else {
605 // handle case switcher
606 switch (text[input + 1].unicode()) {
607 case L'L':
608 out << ReplacementStream::lowerCase;
609 break;
610
611 case L'l':
612 out << ReplacementStream::lowerCaseFirst;
613 break;
614
615 case L'U':
616 out << ReplacementStream::upperCase;
617 break;
618
619 case L'u':
620 out << ReplacementStream::upperCaseFirst;
621 break;
622
623 case L'E': // FALLTHROUGH
624 default:
625 out << ReplacementStream::keepCase;
626 }
627 }
628 input += 2;
629 break;
630
631 case L'#':
632 if (!replacementGoodies) {
633 // strip backslash ("\?" -> "?")
634 out << text[input + 1];
635 input += 2;
636 } else {
637 // handle replacement counter
638 // eat and count all following hash marks
639 // each hash stands for a leading zero: \### will produces 001, 002, ...
640 int minWidth = 1;
641 while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) {
642 minWidth++;
643 }
644 out << ReplacementStream::counter(replacementCounter, minWidth);
645 input += 1 + minWidth;
646 }
647 break;
648
649 case L'a':
650 out << QChar(0x07);
651 input += 2;
652 break;
653
654 case L'f':
655 out << QChar(0x0c);
656 input += 2;
657 break;
658
659 case L'n':
660 out << QChar(0x0a);
661 input += 2;
662 break;
663
664 case L'r':
665 out << QChar(0x0d);
666 input += 2;
667 break;
668
669 case L't':
670 out << QChar(0x09);
671 input += 2;
672 break;
673
674 case L'v':
675 out << QChar(0x0b);
676 input += 2;
677 break;
678
679 case L'x': // "\x0000".."\xffff"
680 if (input + 5 >= inputLen) {
681 // strip backslash ("\x" -> "x")
682 out << text[input + 1];
683 input += 2;
684 } else {
685 bool stripAndSkip = false;
686 const ushort text_2 = text[input + 2].unicode();
687 if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) {
688 const ushort text_3 = text[input + 3].unicode();
689 if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) {
690 const ushort text_4 = text[input + 4].unicode();
691 if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) {
692 const ushort text_5 = text[input + 5].unicode();
693 if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f'))
694 || ((text_5 >= L'A') && (text_5 <= L'F'))) {
695 int digits[4];
696 for (int i = 0; i < 4; i++) {
697 const ushort cur = text[input + 2 + i].unicode();
698 if ((cur >= L'0') && (cur <= L'9')) {
699 digits[i] = 9 - (L'9' - cur);
700 } else if ((cur >= L'a') && (cur <= L'f')) {
701 digits[i] = 15 - (L'f' - cur);
702 } else { // if ((cur >= L'A') && (cur <= L'F')))
703 digits[i] = 15 - (L'F' - cur);
704 }
705 }
706
707 const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3];
708 out << QChar(ch);
709 input += 6;
710 } else {
711 stripAndSkip = true;
712 }
713 } else {
714 stripAndSkip = true;
715 }
716 } else {
717 stripAndSkip = true;
718 }
719 }
720
721 if (stripAndSkip) {
722 // strip backslash ("\x" -> "x")
723 out << text[input + 1];
724 input += 2;
725 }
726 }
727 break;
728
729 default:
730 // strip backslash ("\?" -> "?")
731 out << text[input + 1];
732 input += 2;
733 }
734 break;
735
736 default:
737 out << text[input];
738 input++;
739 }
740 }
741
742 return out.str();
743}
744
745QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine)
746{
747 // '\s' can make a pattern multi-line, it's replaced here with '[ \t]';
748 // besides \s, the following characters can make a pattern multi-line:
749 // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0???
750 // a multi-line pattern must not pass as single-line, the other
751 // way around will just result in slower searches and is therefore
752 // not as critical
753
754 const int inputLen = pattern.length();
755 const QStringView patternView{pattern};
756
757 // prepare output
758 QString output;
759 output.reserve(asize: 2 * inputLen + 1); // twice should be enough for the average case
760
761 // parser state
762 bool insideClass = false;
763
764 stillMultiLine = false;
765 int input = 0;
766 while (input < inputLen) {
767 if (insideClass) {
768 // wait for closing, unescaped ']'
769 switch (pattern[input].unicode()) {
770 case L'\\':
771 switch (pattern[input + 1].unicode()) {
772 case L'x':
773 if (input + 5 < inputLen) {
774 // copy "\x????" unmodified
775 output.append(v: patternView.mid(pos: input, n: 6));
776 input += 6;
777 } else {
778 // copy "\x" unmodified
779 output.append(v: patternView.mid(pos: input, n: 2));
780 input += 2;
781 }
782 stillMultiLine = true;
783 break;
784
785 case L'0':
786 if (input + 4 < inputLen) {
787 // copy "\0???" unmodified
788 output.append(v: patternView.mid(pos: input, n: 5));
789 input += 5;
790 } else {
791 // copy "\0" unmodified
792 output.append(v: patternView.mid(pos: input, n: 2));
793 input += 2;
794 }
795 stillMultiLine = true;
796 break;
797
798 case L's':
799 // replace "\s" with "[ \t]"
800 output.append(s: QLatin1String(" \\t"));
801 input += 2;
802 break;
803
804 case L'n':
805 stillMultiLine = true;
806 // FALLTROUGH
807 Q_FALLTHROUGH();
808
809 default:
810 // copy "\?" unmodified
811 output.append(v: patternView.mid(pos: input, n: 2));
812 input += 2;
813 }
814 break;
815
816 case L']':
817 // copy "]" unmodified
818 insideClass = false;
819 output.append(c: pattern[input]);
820 ++input;
821 break;
822
823 default:
824 // copy "?" unmodified
825 output.append(c: pattern[input]);
826 ++input;
827 }
828 } else {
829 switch (pattern[input].unicode()) {
830 case L'\\':
831 switch (pattern[input + 1].unicode()) {
832 case L'x':
833 if (input + 5 < inputLen) {
834 // copy "\x????" unmodified
835 output.append(v: patternView.mid(pos: input, n: 6));
836 input += 6;
837 } else {
838 // copy "\x" unmodified
839 output.append(v: patternView.mid(pos: input, n: 2));
840 input += 2;
841 }
842 stillMultiLine = true;
843 break;
844
845 case L'0':
846 if (input + 4 < inputLen) {
847 // copy "\0???" unmodified
848 output.append(v: patternView.mid(pos: input, n: 5));
849 input += 5;
850 } else {
851 // copy "\0" unmodified
852 output.append(v: patternView.mid(pos: input, n: 2));
853 input += 2;
854 }
855 stillMultiLine = true;
856 break;
857
858 case L's':
859 // replace "\s" with "[ \t]"
860 output.append(s: QLatin1String("[ \\t]"));
861 input += 2;
862 break;
863
864 case L'n':
865 stillMultiLine = true;
866 // FALLTROUGH
867 Q_FALLTHROUGH();
868 default:
869 // copy "\?" unmodified
870 output.append(v: patternView.mid(pos: input, n: 2));
871 input += 2;
872 }
873 break;
874
875 case L'[':
876 // copy "[" unmodified
877 insideClass = true;
878 output.append(c: pattern[input]);
879 ++input;
880 break;
881
882 default:
883 // copy "?" unmodified
884 output.append(c: pattern[input]);
885 ++input;
886 }
887 }
888 }
889 return output;
890}
891
892// Kill our helpers again
893#ifdef FAST_DEBUG_ENABLE
894#undef FAST_DEBUG_ENABLE
895#endif
896#undef FAST_DEBUG
897

source code of ktexteditor/src/search/kateregexpsearch.cpp