1 | /* |
2 | SPDX-FileCopyrightText: 2010 Bernhard Beschow <bbeschow@cs.tu-berlin.de> |
3 | SPDX-FileCopyrightText: 2007 Sebastian Pipping <webmaster@hartwork.org> |
4 | |
5 | SPDX-License-Identifier: LGPL-2.0-or-later |
6 | */ |
7 | |
8 | // BEGIN includes |
9 | #include "kateregexpsearch.h" |
10 | |
11 | #include "katepartdebug.h" // for LOG_KTE |
12 | |
13 | #include <ktexteditor/document.h> |
14 | // END includes |
15 | |
16 | // Turn debug messages on/off here |
17 | // #define FAST_DEBUG_ENABLE |
18 | |
19 | #ifdef FAST_DEBUG_ENABLE |
20 | #define FAST_DEBUG(x) qCDebug(LOG_KTE) << x |
21 | #else |
22 | #define FAST_DEBUG(x) |
23 | #endif |
24 | |
25 | class KateRegExpSearch::ReplacementStream |
26 | { |
27 | public: |
28 | struct counter { |
29 | counter(int value, int minWidth) |
30 | : value(value) |
31 | , minWidth(minWidth) |
32 | { |
33 | } |
34 | |
35 | const int value; |
36 | const int minWidth; |
37 | }; |
38 | |
39 | struct cap { |
40 | cap(int n) |
41 | : n(n) |
42 | { |
43 | } |
44 | |
45 | const int n; |
46 | }; |
47 | |
48 | enum CaseConversion { |
49 | upperCase, ///< \U ... uppercase from now on |
50 | upperCaseFirst, ///< \u ... uppercase the first letter |
51 | lowerCase, ///< \L ... lowercase from now on |
52 | lowerCaseFirst, ///< \l ... lowercase the first letter |
53 | keepCase ///< \E ... back to original case |
54 | }; |
55 | |
56 | public: |
57 | ReplacementStream(const QStringList &capturedTexts); |
58 | |
59 | QString str() const |
60 | { |
61 | return m_str; |
62 | } |
63 | |
64 | ReplacementStream &operator<<(const QString &); |
65 | ReplacementStream &operator<<(const counter &); |
66 | ReplacementStream &operator<<(const cap &); |
67 | ReplacementStream &operator<<(CaseConversion); |
68 | |
69 | private: |
70 | const QStringList m_capturedTexts; |
71 | CaseConversion m_caseConversion; |
72 | QString m_str; |
73 | }; |
74 | |
75 | KateRegExpSearch::ReplacementStream::ReplacementStream(const QStringList &capturedTexts) |
76 | : m_capturedTexts(capturedTexts) |
77 | , m_caseConversion(keepCase) |
78 | { |
79 | } |
80 | |
81 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const QString &str) |
82 | { |
83 | switch (m_caseConversion) { |
84 | case upperCase: |
85 | // Copy as uppercase |
86 | m_str.append(s: str.toUpper()); |
87 | break; |
88 | |
89 | case upperCaseFirst: |
90 | if (str.length() > 0) { |
91 | m_str.append(c: str.at(i: 0).toUpper()); |
92 | m_str.append(v: QStringView(str).mid(pos: 1)); |
93 | m_caseConversion = keepCase; |
94 | } |
95 | break; |
96 | |
97 | case lowerCase: |
98 | // Copy as lowercase |
99 | m_str.append(s: str.toLower()); |
100 | break; |
101 | |
102 | case lowerCaseFirst: |
103 | if (str.length() > 0) { |
104 | m_str.append(c: str.at(i: 0).toLower()); |
105 | m_str.append(v: QStringView(str).mid(pos: 1)); |
106 | m_caseConversion = keepCase; |
107 | } |
108 | break; |
109 | |
110 | case keepCase: // FALLTHROUGH |
111 | default: |
112 | // Copy unmodified |
113 | m_str.append(s: str); |
114 | break; |
115 | } |
116 | |
117 | return *this; |
118 | } |
119 | |
120 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const counter &c) |
121 | { |
122 | // Zero padded counter value |
123 | m_str.append(QStringLiteral("%1" ).arg(a: c.value, fieldWidth: c.minWidth, base: 10, fillChar: QLatin1Char('0'))); |
124 | |
125 | return *this; |
126 | } |
127 | |
128 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(const cap &cap) |
129 | { |
130 | if (0 <= cap.n && cap.n < m_capturedTexts.size()) { |
131 | (*this) << m_capturedTexts[cap.n]; |
132 | } else { |
133 | // Insert just the number to be consistent with QRegExp ("\c" becomes "c") |
134 | m_str.append(s: QString::number(cap.n)); |
135 | } |
136 | |
137 | return *this; |
138 | } |
139 | |
140 | KateRegExpSearch::ReplacementStream &KateRegExpSearch::ReplacementStream::operator<<(CaseConversion caseConversion) |
141 | { |
142 | m_caseConversion = caseConversion; |
143 | |
144 | return *this; |
145 | } |
146 | |
147 | // BEGIN d'tor, c'tor |
148 | // |
149 | // KateSearch Constructor |
150 | // |
151 | KateRegExpSearch::KateRegExpSearch(const KTextEditor::Document *document) |
152 | : m_document(document) |
153 | { |
154 | } |
155 | |
156 | // helper structs for captures re-construction |
157 | struct TwoViewCursor { |
158 | int index; |
159 | int line; |
160 | int col; |
161 | }; |
162 | |
163 | struct IndexPair { |
164 | int openIndex; |
165 | int closeIndex; |
166 | }; |
167 | |
168 | QList<KTextEditor::Range> |
169 | KateRegExpSearch::search(const QString &pattern, KTextEditor::Range inputRange, bool backwards, QRegularExpression::PatternOptions options) |
170 | { |
171 | // Save regexes to avoid reconstructing regexes all the time |
172 | static QRegularExpression preRegex; |
173 | static QRegularExpression repairedRegex; |
174 | |
175 | // Returned if no matches are found |
176 | QList<KTextEditor::Range> noResult(1, KTextEditor::Range::invalid()); |
177 | |
178 | // Note that some methods in vimode (e.g. Searcher::findPatternWorker) rely on the |
179 | // this method returning here if 'pattern' is empty. |
180 | if (pattern.isEmpty() || inputRange.isEmpty() || !inputRange.isValid()) { |
181 | return noResult; |
182 | } |
183 | |
184 | // Always enable Unicode support |
185 | options |= QRegularExpression::UseUnicodePropertiesOption; |
186 | |
187 | if (preRegex.pattern() != pattern || preRegex.patternOptions() != options) { |
188 | preRegex = QRegularExpression(pattern, options); |
189 | } |
190 | |
191 | // If repairPattern() is called on an invalid regex pattern it may cause asserts |
192 | // in QString (e.g. if the pattern is just '\\', pattern.size() is 1, and repaierPattern |
193 | // expects at least one character after a '\') |
194 | if (!preRegex.isValid()) { |
195 | return noResult; |
196 | } |
197 | |
198 | // detect pattern type (single- or mutli-line) |
199 | bool stillMultiLine; |
200 | const QString repairedPattern = repairPattern(pattern, stillMultiLine); |
201 | |
202 | // Enable multiline mode, so that the ^ and $ metacharacters in the pattern |
203 | // are allowed to match, respectively, immediately after and immediately |
204 | // before any newline in the subject string, as well as at the very beginning |
205 | // and at the very end of the subject string (see QRegularExpression docs). |
206 | // |
207 | // Whole lines are passed to QRegularExpression, so that e.g. if the inputRange |
208 | // ends in the middle of a line, then a '$' won't match at that position. And |
209 | // matches that are out of the inputRange are rejected. |
210 | if (stillMultiLine) { |
211 | options |= QRegularExpression::MultilineOption; |
212 | } |
213 | |
214 | // check if anything changed at all |
215 | if (repairedRegex.pattern() != repairedPattern || repairedRegex.patternOptions() != options) { |
216 | repairedRegex.setPattern(repairedPattern); |
217 | repairedRegex.setPatternOptions(options); |
218 | } |
219 | if (!repairedRegex.isValid()) { |
220 | return noResult; |
221 | } |
222 | |
223 | const int rangeStartLine = inputRange.start().line(); |
224 | const int rangeStartCol = inputRange.start().column(); |
225 | |
226 | const int rangeEndLine = inputRange.end().line(); |
227 | const int rangeEndCol = inputRange.end().column(); |
228 | |
229 | if (stillMultiLine) { |
230 | const int rangeLineCount = rangeEndLine - rangeStartLine + 1; |
231 | FAST_DEBUG("regular expression search (lines " << rangeStartLine << ".." << rangeEndLine << ")" ); |
232 | |
233 | const int docLineCount = m_document->lines(); |
234 | // nothing to do... |
235 | if (rangeStartLine >= docLineCount) { |
236 | return noResult; |
237 | } |
238 | |
239 | QList<int> lineLens(rangeLineCount); |
240 | int maxMatchOffset = 0; |
241 | |
242 | // all lines in the input range |
243 | QString wholeRange; |
244 | for (int i = 0; i < rangeLineCount; ++i) { |
245 | const int docLineIndex = rangeStartLine + i; |
246 | if (docLineIndex < 0 || docLineCount <= docLineIndex) { // invalid index |
247 | return noResult; |
248 | } |
249 | |
250 | const QString textLine = m_document->line(line: docLineIndex); |
251 | lineLens[i] = textLine.length(); |
252 | wholeRange.append(s: textLine); |
253 | |
254 | // This check is needed as some parts in vimode rely on this behaviour. |
255 | // We add an '\n' as a delimiter between lines in the range; but never after the |
256 | // last line as that would add an '\n' that isn't there in the original text, |
257 | // and can skew search results or hit an assert when accessing lineLens later |
258 | // in the code. |
259 | if (i != (rangeLineCount - 1)) { |
260 | wholeRange.append(c: QLatin1Char('\n')); |
261 | } |
262 | |
263 | // lineLens.at(i) + 1, because '\n' was added |
264 | maxMatchOffset += (i == rangeEndLine) ? rangeEndCol : lineLens.at(i) + 1; |
265 | |
266 | FAST_DEBUG(" line" << i << "has length" << lineLens.at(i)); |
267 | } |
268 | |
269 | FAST_DEBUG("Max. match offset" << maxMatchOffset); |
270 | |
271 | QRegularExpressionMatch match; |
272 | bool found = false; |
273 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatch(subject: wholeRange, offset: rangeStartCol); |
274 | |
275 | if (backwards) { |
276 | while (iter.hasNext()) { |
277 | QRegularExpressionMatch curMatch = iter.next(); |
278 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
279 | match.swap(other&: curMatch); |
280 | found = true; |
281 | } |
282 | } |
283 | } else { /* forwards */ |
284 | QRegularExpressionMatch curMatch; |
285 | if (iter.hasNext()) { |
286 | curMatch = iter.next(); |
287 | } |
288 | if (curMatch.capturedEnd() <= maxMatchOffset) { |
289 | match.swap(other&: curMatch); |
290 | found = true; |
291 | } |
292 | } |
293 | |
294 | if (!found) { |
295 | // no match |
296 | FAST_DEBUG("not found" ); |
297 | return noResult; |
298 | } |
299 | |
300 | // Capture groups: save opening and closing indices and build a map, |
301 | // the correct values will be written into it later |
302 | QMap<int, TwoViewCursor *> indicesToCursors; |
303 | const int numCaptures = repairedRegex.captureCount(); |
304 | QList<IndexPair> indexPairs(numCaptures + 1); |
305 | for (int c = 0; c <= numCaptures; ++c) { |
306 | const int openIndex = match.capturedStart(nth: c); |
307 | IndexPair &pair = indexPairs[c]; |
308 | if (openIndex == -1) { |
309 | // An invalid index indicates an empty capture group |
310 | pair.openIndex = -1; |
311 | pair.closeIndex = -1; |
312 | FAST_DEBUG("capture []" ); |
313 | } else { |
314 | const int closeIndex = match.capturedEnd(nth: c); |
315 | pair.openIndex = openIndex; |
316 | pair.closeIndex = closeIndex; |
317 | FAST_DEBUG("capture [" << pair.openIndex << ".." << pair.closeIndex << "]" ); |
318 | |
319 | // each key no more than once |
320 | if (!indicesToCursors.contains(key: openIndex)) { |
321 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
322 | twoViewCursor->index = openIndex; |
323 | indicesToCursors.insert(key: openIndex, value: twoViewCursor); |
324 | FAST_DEBUG(" capture group start index added: " << openIndex); |
325 | } |
326 | if (!indicesToCursors.contains(key: closeIndex)) { |
327 | TwoViewCursor *twoViewCursor = new TwoViewCursor; |
328 | twoViewCursor->index = closeIndex; |
329 | indicesToCursors.insert(key: closeIndex, value: twoViewCursor); |
330 | FAST_DEBUG(" capture group end index added: " << closeIndex); |
331 | } |
332 | } |
333 | } |
334 | |
335 | // find out where they belong |
336 | int curRelLine = 0; |
337 | int curRelCol = 0; |
338 | int curRelIndex = 0; |
339 | |
340 | for (TwoViewCursor *twoViewCursor : std::as_const(t&: indicesToCursors)) { |
341 | // forward to index, save line/col |
342 | const int index = twoViewCursor->index; |
343 | FAST_DEBUG("resolving position" << index); |
344 | |
345 | while (curRelIndex <= index) { |
346 | FAST_DEBUG("walk pos (" << curRelLine << "," << curRelCol << ") = " << curRelIndex << "relative, steps more to go" << index - curRelIndex); |
347 | |
348 | const int curRelLineLen = lineLens.at(i: curRelLine); |
349 | const int curLineRemainder = curRelLineLen - curRelCol; |
350 | const int lineFeedIndex = curRelIndex + curLineRemainder; |
351 | if (index <= lineFeedIndex) { |
352 | if (index == lineFeedIndex) { |
353 | // on this line _at_ line feed |
354 | FAST_DEBUG(" on line feed" ); |
355 | const int absLine = curRelLine + rangeStartLine; |
356 | twoViewCursor->line = absLine; |
357 | twoViewCursor->col = curRelLineLen; |
358 | |
359 | // advance to next line |
360 | const int advance = (index - curRelIndex) + 1; |
361 | ++curRelLine; |
362 | curRelCol = 0; |
363 | curRelIndex += advance; |
364 | } else { // index < lineFeedIndex |
365 | // on this line _before_ line feed |
366 | FAST_DEBUG(" before line feed" ); |
367 | const int diff = (index - curRelIndex); |
368 | const int absLine = curRelLine + rangeStartLine; |
369 | const int absCol = curRelCol + diff; |
370 | twoViewCursor->line = absLine; |
371 | twoViewCursor->col = absCol; |
372 | |
373 | // advance on same line |
374 | const int advance = diff + 1; |
375 | curRelCol += advance; |
376 | curRelIndex += advance; |
377 | } |
378 | FAST_DEBUG("position(" << twoViewCursor->line << "," << twoViewCursor->col << ")" ); |
379 | } else { // if (index > lineFeedIndex) |
380 | // not on this line |
381 | // advance to next line |
382 | FAST_DEBUG(" not on this line" ); |
383 | ++curRelLine; |
384 | curRelCol = 0; |
385 | const int advance = curLineRemainder + 1; |
386 | curRelIndex += advance; |
387 | } |
388 | } |
389 | } |
390 | |
391 | // build result array |
392 | QList<KTextEditor::Range> result(numCaptures + 1, KTextEditor::Range::invalid()); |
393 | for (int y = 0; y <= numCaptures; y++) { |
394 | IndexPair &pair = indexPairs[y]; |
395 | if (!(pair.openIndex == -1 || pair.closeIndex == -1)) { |
396 | const TwoViewCursor *const openCursors = indicesToCursors.value(key: pair.openIndex); |
397 | const TwoViewCursor *const closeCursors = indicesToCursors.value(key: pair.closeIndex); |
398 | const int startLine = openCursors->line; |
399 | const int startCol = openCursors->col; |
400 | const int endLine = closeCursors->line; |
401 | const int endCol = closeCursors->col; |
402 | FAST_DEBUG("range " << y << ": (" << startLine << ", " << startCol << ")..(" << endLine << ", " << endCol << ")" ); |
403 | result[y] = KTextEditor::Range(startLine, startCol, endLine, endCol); |
404 | } |
405 | } |
406 | |
407 | // free structs allocated for indicesToCursors |
408 | qDeleteAll(c: indicesToCursors); |
409 | |
410 | return result; |
411 | } else { |
412 | // single-line regex search (forwards and backwards) |
413 | const int rangeStartCol = inputRange.start().column(); |
414 | const uint rangeEndCol = inputRange.end().column(); |
415 | |
416 | const int rangeStartLine = inputRange.start().line(); |
417 | const int rangeEndLine = inputRange.end().line(); |
418 | |
419 | const int forInit = backwards ? rangeEndLine : rangeStartLine; |
420 | |
421 | const int forInc = backwards ? -1 : +1; |
422 | |
423 | FAST_DEBUG("single line " << (backwards ? rangeEndLine : rangeStartLine) << ".." << (backwards ? rangeStartLine : rangeEndLine)); |
424 | |
425 | for (int j = forInit; (rangeStartLine <= j) && (j <= rangeEndLine); j += forInc) { |
426 | if (j < 0 || m_document->lines() <= j) { |
427 | FAST_DEBUG("searchText | line " << j << ": no" ); |
428 | return noResult; |
429 | } |
430 | |
431 | const QString textLine = m_document->line(line: j); |
432 | |
433 | const int offset = (j == rangeStartLine) ? rangeStartCol : 0; |
434 | const int endLineMaxOffset = (j == rangeEndLine) ? rangeEndCol : textLine.length(); |
435 | |
436 | bool found = false; |
437 | |
438 | QRegularExpressionMatch match; |
439 | |
440 | if (backwards) { |
441 | // we can use globalMatchView as textLine is a const local above |
442 | QRegularExpressionMatchIterator iter = repairedRegex.globalMatchView(subjectView: textLine, offset); |
443 | while (iter.hasNext()) { |
444 | QRegularExpressionMatch curMatch = iter.next(); |
445 | if (curMatch.capturedEnd() <= endLineMaxOffset) { |
446 | match.swap(other&: curMatch); |
447 | found = true; |
448 | } |
449 | } |
450 | } else { |
451 | // we can use matchView as textLine is a const local above |
452 | match = repairedRegex.matchView(subjectView: textLine, offset); |
453 | if (match.hasMatch() && match.capturedEnd() <= endLineMaxOffset) { |
454 | found = true; |
455 | } |
456 | } |
457 | |
458 | if (found) { |
459 | FAST_DEBUG("line " << j << ": yes" ); |
460 | |
461 | // build result array |
462 | const int numCaptures = repairedRegex.captureCount(); |
463 | QList<KTextEditor::Range> result(numCaptures + 1); |
464 | result[0] = KTextEditor::Range(j, match.capturedStart(), j, match.capturedEnd()); |
465 | |
466 | FAST_DEBUG("result range " << 0 << ": (" << j << ", " << match.capturedStart() << ")..(" << j << ", " << match.capturedEnd() << ")" ); |
467 | |
468 | for (int y = 1; y <= numCaptures; ++y) { |
469 | const int openIndex = match.capturedStart(nth: y); |
470 | |
471 | if (openIndex == -1) { |
472 | result[y] = KTextEditor::Range::invalid(); |
473 | |
474 | FAST_DEBUG("capture []" ); |
475 | } else { |
476 | const int closeIndex = match.capturedEnd(nth: y); |
477 | |
478 | FAST_DEBUG("result range " << y << ": (" << j << ", " << openIndex << ")..(" << j << ", " << closeIndex << ")" ); |
479 | |
480 | result[y] = KTextEditor::Range(j, openIndex, j, closeIndex); |
481 | } |
482 | } |
483 | return result; |
484 | } else { |
485 | FAST_DEBUG("searchText | line " << j << ": no" ); |
486 | } |
487 | } |
488 | } |
489 | return noResult; |
490 | } |
491 | |
492 | /*static*/ QString KateRegExpSearch::escapePlaintext(const QString &text) |
493 | { |
494 | return buildReplacement(text, capturedTexts: QStringList(), replacementCounter: 0, replacementGoodies: false); |
495 | } |
496 | |
497 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter) |
498 | { |
499 | return buildReplacement(text, capturedTexts, replacementCounter, replacementGoodies: true); |
500 | } |
501 | |
502 | /*static*/ QString KateRegExpSearch::buildReplacement(const QString &text, const QStringList &capturedTexts, int replacementCounter, bool replacementGoodies) |
503 | { |
504 | // get input |
505 | const int inputLen = text.length(); |
506 | int input = 0; // walker index |
507 | |
508 | // prepare output |
509 | ReplacementStream out(capturedTexts); |
510 | |
511 | while (input < inputLen) { |
512 | switch (text[input].unicode()) { |
513 | case L'\n': |
514 | out << text[input]; |
515 | input++; |
516 | break; |
517 | |
518 | case L'\\': |
519 | if (input + 1 >= inputLen) { |
520 | // copy backslash |
521 | out << text[input]; |
522 | input++; |
523 | break; |
524 | } |
525 | |
526 | switch (text[input + 1].unicode()) { |
527 | case L'0': // "\0000".."\0377" |
528 | if (input + 4 >= inputLen) { |
529 | out << ReplacementStream::cap(0); |
530 | input += 2; |
531 | } else { |
532 | bool stripAndSkip = false; |
533 | const ushort text_2 = text[input + 2].unicode(); |
534 | if ((text_2 >= L'0') && (text_2 <= L'3')) { |
535 | const ushort text_3 = text[input + 3].unicode(); |
536 | if ((text_3 >= L'0') && (text_3 <= L'7')) { |
537 | const ushort text_4 = text[input + 4].unicode(); |
538 | if ((text_4 >= L'0') && (text_4 <= L'7')) { |
539 | int digits[3]; |
540 | for (int i = 0; i < 3; i++) { |
541 | digits[i] = 7 - (L'7' - text[input + 2 + i].unicode()); |
542 | } |
543 | const int ch = 64 * digits[0] + 8 * digits[1] + digits[2]; |
544 | out << QChar(ch); |
545 | input += 5; |
546 | } else { |
547 | stripAndSkip = true; |
548 | } |
549 | } else { |
550 | stripAndSkip = true; |
551 | } |
552 | } else { |
553 | stripAndSkip = true; |
554 | } |
555 | |
556 | if (stripAndSkip) { |
557 | out << ReplacementStream::cap(0); |
558 | input += 2; |
559 | } |
560 | } |
561 | break; |
562 | |
563 | // single letter captures \x |
564 | case L'1': |
565 | case L'2': |
566 | case L'3': |
567 | case L'4': |
568 | case L'5': |
569 | case L'6': |
570 | case L'7': |
571 | case L'8': |
572 | case L'9': |
573 | out << ReplacementStream::cap(9 - (L'9' - text[input + 1].unicode())); |
574 | input += 2; |
575 | break; |
576 | |
577 | // multi letter captures \{xxxx} |
578 | case L'{': { |
579 | // allow {1212124}.... captures, see bug 365124 + testReplaceManyCapturesBug365124 |
580 | int capture = 0; |
581 | int captureSize = 2; |
582 | while ((input + captureSize) < inputLen) { |
583 | const ushort nextDigit = text[input + captureSize].unicode(); |
584 | if ((nextDigit >= L'0') && (nextDigit <= L'9')) { |
585 | capture = (10 * capture) + (9 - (L'9' - nextDigit)); |
586 | ++captureSize; |
587 | continue; |
588 | } |
589 | if (nextDigit == L'}') { |
590 | ++captureSize; |
591 | break; |
592 | } |
593 | break; |
594 | } |
595 | out << ReplacementStream::cap(capture); |
596 | input += captureSize; |
597 | break; |
598 | } |
599 | |
600 | case L'E': // FALLTHROUGH |
601 | case L'L': // FALLTHROUGH |
602 | case L'l': // FALLTHROUGH |
603 | case L'U': // FALLTHROUGH |
604 | case L'u': |
605 | if (!replacementGoodies) { |
606 | // strip backslash ("\?" -> "?") |
607 | out << text[input + 1]; |
608 | } else { |
609 | // handle case switcher |
610 | switch (text[input + 1].unicode()) { |
611 | case L'L': |
612 | out << ReplacementStream::lowerCase; |
613 | break; |
614 | |
615 | case L'l': |
616 | out << ReplacementStream::lowerCaseFirst; |
617 | break; |
618 | |
619 | case L'U': |
620 | out << ReplacementStream::upperCase; |
621 | break; |
622 | |
623 | case L'u': |
624 | out << ReplacementStream::upperCaseFirst; |
625 | break; |
626 | |
627 | case L'E': // FALLTHROUGH |
628 | default: |
629 | out << ReplacementStream::keepCase; |
630 | } |
631 | } |
632 | input += 2; |
633 | break; |
634 | |
635 | case L'#': |
636 | if (!replacementGoodies) { |
637 | // strip backslash ("\?" -> "?") |
638 | out << text[input + 1]; |
639 | input += 2; |
640 | } else { |
641 | // handle replacement counter |
642 | // eat and count all following hash marks |
643 | // each hash stands for a leading zero: \### will produces 001, 002, ... |
644 | int minWidth = 1; |
645 | while ((input + minWidth + 1 < inputLen) && (text[input + minWidth + 1].unicode() == L'#')) { |
646 | minWidth++; |
647 | } |
648 | out << ReplacementStream::counter(replacementCounter, minWidth); |
649 | input += 1 + minWidth; |
650 | } |
651 | break; |
652 | |
653 | case L'a': |
654 | out << QChar(0x07); |
655 | input += 2; |
656 | break; |
657 | |
658 | case L'f': |
659 | out << QChar(0x0c); |
660 | input += 2; |
661 | break; |
662 | |
663 | case L'n': |
664 | out << QChar(0x0a); |
665 | input += 2; |
666 | break; |
667 | |
668 | case L'r': |
669 | out << QChar(0x0d); |
670 | input += 2; |
671 | break; |
672 | |
673 | case L't': |
674 | out << QChar(0x09); |
675 | input += 2; |
676 | break; |
677 | |
678 | case L'v': |
679 | out << QChar(0x0b); |
680 | input += 2; |
681 | break; |
682 | |
683 | case L'x': // "\x0000".."\xffff" |
684 | if (input + 5 >= inputLen) { |
685 | // strip backslash ("\x" -> "x") |
686 | out << text[input + 1]; |
687 | input += 2; |
688 | } else { |
689 | bool stripAndSkip = false; |
690 | const ushort text_2 = text[input + 2].unicode(); |
691 | if (((text_2 >= L'0') && (text_2 <= L'9')) || ((text_2 >= L'a') && (text_2 <= L'f')) || ((text_2 >= L'A') && (text_2 <= L'F'))) { |
692 | const ushort text_3 = text[input + 3].unicode(); |
693 | if (((text_3 >= L'0') && (text_3 <= L'9')) || ((text_3 >= L'a') && (text_3 <= L'f')) || ((text_3 >= L'A') && (text_3 <= L'F'))) { |
694 | const ushort text_4 = text[input + 4].unicode(); |
695 | if (((text_4 >= L'0') && (text_4 <= L'9')) || ((text_4 >= L'a') && (text_4 <= L'f')) || ((text_4 >= L'A') && (text_4 <= L'F'))) { |
696 | const ushort text_5 = text[input + 5].unicode(); |
697 | if (((text_5 >= L'0') && (text_5 <= L'9')) || ((text_5 >= L'a') && (text_5 <= L'f')) |
698 | || ((text_5 >= L'A') && (text_5 <= L'F'))) { |
699 | int digits[4]; |
700 | for (int i = 0; i < 4; i++) { |
701 | const ushort cur = text[input + 2 + i].unicode(); |
702 | if ((cur >= L'0') && (cur <= L'9')) { |
703 | digits[i] = 9 - (L'9' - cur); |
704 | } else if ((cur >= L'a') && (cur <= L'f')) { |
705 | digits[i] = 15 - (L'f' - cur); |
706 | } else { // if ((cur >= L'A') && (cur <= L'F'))) |
707 | digits[i] = 15 - (L'F' - cur); |
708 | } |
709 | } |
710 | |
711 | const int ch = 4096 * digits[0] + 256 * digits[1] + 16 * digits[2] + digits[3]; |
712 | out << QChar(ch); |
713 | input += 6; |
714 | } else { |
715 | stripAndSkip = true; |
716 | } |
717 | } else { |
718 | stripAndSkip = true; |
719 | } |
720 | } else { |
721 | stripAndSkip = true; |
722 | } |
723 | } |
724 | |
725 | if (stripAndSkip) { |
726 | // strip backslash ("\x" -> "x") |
727 | out << text[input + 1]; |
728 | input += 2; |
729 | } |
730 | } |
731 | break; |
732 | |
733 | default: |
734 | // strip backslash ("\?" -> "?") |
735 | out << text[input + 1]; |
736 | input += 2; |
737 | } |
738 | break; |
739 | |
740 | default: |
741 | out << text[input]; |
742 | input++; |
743 | } |
744 | } |
745 | |
746 | return out.str(); |
747 | } |
748 | |
749 | QString KateRegExpSearch::repairPattern(const QString &pattern, bool &stillMultiLine) |
750 | { |
751 | // '\s' can make a pattern multi-line, it's replaced here with '[ \t]'; |
752 | // besides \s, the following characters can make a pattern multi-line: |
753 | // \n, \x000A (Line Feed), \x????-\x????, \0012, \0???-\0??? |
754 | // a multi-line pattern must not pass as single-line, the other |
755 | // way around will just result in slower searches and is therefore |
756 | // not as critical |
757 | |
758 | const int inputLen = pattern.length(); |
759 | const QStringView patternView{pattern}; |
760 | |
761 | // prepare output |
762 | QString output; |
763 | output.reserve(asize: 2 * inputLen + 1); // twice should be enough for the average case |
764 | |
765 | // parser state |
766 | bool insideClass = false; |
767 | |
768 | stillMultiLine = false; |
769 | int input = 0; |
770 | while (input < inputLen) { |
771 | if (insideClass) { |
772 | // wait for closing, unescaped ']' |
773 | switch (pattern[input].unicode()) { |
774 | case L'\\': |
775 | switch (pattern[input + 1].unicode()) { |
776 | case L'x': |
777 | if (input + 5 < inputLen) { |
778 | // copy "\x????" unmodified |
779 | output.append(v: patternView.mid(pos: input, n: 6)); |
780 | input += 6; |
781 | } else { |
782 | // copy "\x" unmodified |
783 | output.append(v: patternView.mid(pos: input, n: 2)); |
784 | input += 2; |
785 | } |
786 | stillMultiLine = true; |
787 | break; |
788 | |
789 | case L'0': |
790 | if (input + 4 < inputLen) { |
791 | // copy "\0???" unmodified |
792 | output.append(v: patternView.mid(pos: input, n: 5)); |
793 | input += 5; |
794 | } else { |
795 | // copy "\0" unmodified |
796 | output.append(v: patternView.mid(pos: input, n: 2)); |
797 | input += 2; |
798 | } |
799 | stillMultiLine = true; |
800 | break; |
801 | |
802 | case L's': |
803 | // replace "\s" with "[ \t]" |
804 | output.append(s: QLatin1String(" \\t" )); |
805 | input += 2; |
806 | break; |
807 | |
808 | case L'n': |
809 | stillMultiLine = true; |
810 | // FALLTROUGH |
811 | Q_FALLTHROUGH(); |
812 | |
813 | default: |
814 | // copy "\?" unmodified |
815 | output.append(v: patternView.mid(pos: input, n: 2)); |
816 | input += 2; |
817 | } |
818 | break; |
819 | |
820 | case L']': |
821 | // copy "]" unmodified |
822 | insideClass = false; |
823 | output.append(c: pattern[input]); |
824 | ++input; |
825 | break; |
826 | |
827 | default: |
828 | // copy "?" unmodified |
829 | output.append(c: pattern[input]); |
830 | ++input; |
831 | } |
832 | } else { |
833 | switch (pattern[input].unicode()) { |
834 | case L'\\': |
835 | switch (pattern[input + 1].unicode()) { |
836 | case L'x': |
837 | if (input + 5 < inputLen) { |
838 | // copy "\x????" unmodified |
839 | output.append(v: patternView.mid(pos: input, n: 6)); |
840 | input += 6; |
841 | } else { |
842 | // copy "\x" unmodified |
843 | output.append(v: patternView.mid(pos: input, n: 2)); |
844 | input += 2; |
845 | } |
846 | stillMultiLine = true; |
847 | break; |
848 | |
849 | case L'0': |
850 | if (input + 4 < inputLen) { |
851 | // copy "\0???" unmodified |
852 | output.append(v: patternView.mid(pos: input, n: 5)); |
853 | input += 5; |
854 | } else { |
855 | // copy "\0" unmodified |
856 | output.append(v: patternView.mid(pos: input, n: 2)); |
857 | input += 2; |
858 | } |
859 | stillMultiLine = true; |
860 | break; |
861 | |
862 | case L's': |
863 | // replace "\s" with "[ \t]" |
864 | output.append(s: QLatin1String("[ \\t]" )); |
865 | input += 2; |
866 | break; |
867 | |
868 | case L'n': |
869 | stillMultiLine = true; |
870 | // FALLTROUGH |
871 | Q_FALLTHROUGH(); |
872 | default: |
873 | // copy "\?" unmodified |
874 | output.append(v: patternView.mid(pos: input, n: 2)); |
875 | input += 2; |
876 | } |
877 | break; |
878 | |
879 | case L'[': |
880 | // copy "[" unmodified |
881 | insideClass = true; |
882 | output.append(c: pattern[input]); |
883 | ++input; |
884 | break; |
885 | |
886 | default: |
887 | // copy "?" unmodified |
888 | output.append(c: pattern[input]); |
889 | ++input; |
890 | } |
891 | } |
892 | } |
893 | return output; |
894 | } |
895 | |
896 | // Kill our helpers again |
897 | #ifdef FAST_DEBUG_ENABLE |
898 | #undef FAST_DEBUG_ENABLE |
899 | #endif |
900 | #undef FAST_DEBUG |
901 | |